def generate_from_metadata(file, num_tracks): """Return track id's by looking up the name on music brainz Args: fname: The file containing the track in question. Yields: A set of track_id, by querying based on id3 tags """ album = file.getMDAlbumTitle() title = file.getMDTrackTitle() artist = file.getMDTrackArtist() if album is None or title is None or artist is None: return # Can't get metadata util.update_progress("Searching albums by text lookup: " + ` album ` + " " + ` artist `) for i in flatten( util.combinations(lookups.get_releases_by_cdtext, album, artist, num_tracks)): release = lookups.get_release_by_releaseid(i.release.id) util.update_progress("Trying " + release.title + " by text lookup") for trackind in range(len(release.tracks)): rtrackname = release.tracks[trackind].title if type(title) != type([]): title = [title] for t in title: if util.comp_name(rtrackname, t): print "Using album based text comparison for", artist.strip( ), album.strip(), "'s track", trackind + 1, ` rtrackname ` yield lookups.get_track_by_id(release.tracks[trackind].id) else: print "Failed text lookup for %s" % t
def test_worker(model, sess, subset='kpval'): # build data reader reader = TestReader(batch_size=32, subset=subset, use_fb_data=FLAGS.use_fb_data) quest_ids = [] result = [] tf.logging.info('\nRunning inference on split %s...' % subset) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() mc_scores = sess.run(model._logits, feed_dict=model.fill_feed_dict(outputs[:-3])) choice_idx = np.argmax(mc_scores, axis=1) cands, _qids, image_ids = outputs[-3:] for qid, cid, mcs in zip(_qids, choice_idx, cands): answer = mcs['cands'][cid] assert (mcs['quest_id'] == qid) result.append({u'answer': answer, u'question_id': qid}) quest_ids.append(_qids) return quest_ids, result
def display(): if 'access_token' not in session: abort(400) access_token = session['access_token'] if 'job' in session: job = get_job_from_key(session['job'], conn) # Only rely on a previous result if the same user is logged in (same access_token) if job is not None and access_token == job.meta.get('access_token', None): return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used']) try: client = Dropbox(access_token) except Exception: abort(401) account = client.users_get_current_account() session['username'] = account.name.display_name space_usage = client.users_get_space_usage() allocated, used = get_space_usage_info(space_usage) total_bytes = used session['used'] = human_readable(used) session['quota'] = human_readable(allocated) job = q.enqueue(walk_entire_dropbox, access_token, total_bytes) job.meta['access_token'] = access_token job.save() update_progress(job, 0, "/") session['job'] = job.key return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used'])
def fit(self): kmeans = MiniBatchKMeans(self.nr_centroids, init='k-means++') for it in range(self.nr_it): print "Iteration {0} out of {1}".format(it, self.nr_it) batches = randbr.RandomBatchReader() maxIterations = batches.nbatches for i, batch in enumerate(batches): if self.rotational_invariant_training: # rotate the batch 90, 180 and 270 degrees batch90 = self.rotate_patches_90_degrees(batch,1) batch180 = self.rotate_patches_90_degrees(batch,2) batch270 = self.rotate_patches_90_degrees(batch,3) batch = np.concatenate((batch, np.concatenate((batch90, np.concatenate((batch180,batch270)))))) if self.mirror_invariant_training: # mirror the batch left-right, and up-down batchlr = self.mirror_patches(batch, "lr") batchud = self.mirror_patches(batch, "ud") batch = np.concatenate((batch, np.concatenate((batchlr,batchud)))) # Training util.update_progress((i+(it*maxIterations))/(maxIterations*self.nr_it)) kmeans.partial_fit(batch) util.update_progress(1.0) print "fitting done" return kmeans.cluster_centers_
def extract_stats(filepaths, image_size, square_function): print "Calculating mean, std and var of all images" #Running total (sum) of all images count_so_far = 0 mean = np.zeros((image_size, image_size)) M2 = np.zeros((image_size, image_size)) n = len(filepaths) for i, filepath in enumerate(filepaths): image = misc.imread(filepath, flatten=1) image = process(image, square_function, image_size) # Online statistics count_so_far = count_so_far + 1 delta = image - mean mean = mean + delta / count_so_far M2 = M2 + delta * (image - mean) if i % 50 == 0: util.update_progress(i / n) util.update_progress(1.0) mean_image = mean variance_image = M2 / (n - 1) std_image = np.sqrt(variance_image) print "Plotting mean image (only shows afterwards)" util.plot(mean_image, invert=True) return mean_image, variance_image, std_image
def generate_from_metadata(file, num_tracks): """Return track id's by looking up the name on music brainz Args: fname: The file containing the track in question. Yields: A set of track_id, by querying based on id3 tags """ album = file.getMDAlbumTitle() title = file.getMDTrackTitle() artist = file.getMDTrackArtist() if album is None or title is None or artist is None: return # Can't get metadata util.update_progress("Searching albums by text lookup: "+`album`+" "+`artist`) for i in flatten(util.combinations(lookups.get_releases_by_cdtext,album, artist, num_tracks)): release = lookups.get_release_by_releaseid(i.release.id) util.update_progress("Trying "+release.title+" by text lookup") for trackind in range(len(release.tracks)): rtrackname = release.tracks[trackind].title if type(title) != type([]): title=[title] for t in title: if util.comp_name(rtrackname,t): print "Using album based text comparison for",artist.strip(),album.strip(),"'s track",trackind+1,`rtrackname` yield lookups.get_track_by_id(release.tracks[trackind].id) else: print "Failed text lookup for %s" % t
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run( prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) # to_sentence = SentenceGenerator(trainset='trainval') results = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run(prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 ans_cand_ids = np.argsort(-generated_ans, axis=1) quest_ids = outputs[-2] for quest_id, ids in zip(quest_ids, ans_cand_ids): answers = [] for k in range(_K): aid = ids[k] ans = to_sentence.index_to_top_answer(aid) answers.append(ans) res_i = {'question_id': int(quest_id), 'answers': answers} results.append(res_i) eval_recall(results)
def test(checkpoint_path=None): batch_size = 64 config = ModelConfig() config.sample_negative = FLAGS.sample_negative config.use_fb_bn = FLAGS.use_fb_bn # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = TestReader(batch_size=batch_size, subset=TEST_SET, use_fb_data=FLAGS.use_fb_data) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) quest_ids = [] result = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() mc_scores = sess.run(model._logits, feed_dict=model.fill_feed_dict(outputs[:-3])) choice_idx = np.argmax(mc_scores, axis=1) cands, _qids, image_ids = outputs[-3:] for qid, cid, mcs in zip(_qids, choice_idx, cands): answer = mcs['cands'][cid] assert (mcs['quest_id'] == qid) result.append({u'answer': answer, u'question_id': qid}) quest_ids.append(_qids) quest_ids = np.concatenate(quest_ids) # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def test(checkpoint_path=None): batch_size = 4 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. to_sentence = SentenceGenerator( trainset='trainval', top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt') ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run(prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) gt = reader._answer n1, n2 = (gt == ans_ids).sum(), gt.size acc = n1 / float(n2) print('\nAcc: %0.2f, %d/%d' % (acc * 100., n1, n2)) return acc
def test(checkpoint_path=None): batch_size = 128 # build data reader reader = Reader(batch_size=batch_size, subset=TEST_SET, phase='test', version='v1') if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1', 'Fusion')) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = RerankModel(phase='test', version='v1', num_cands=5) model.build() sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval', top_ans_file='../iccv_vaq/data/vqa_trainval_top2000_answers.txt') ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.pop_batch() model_preds = sess.run(model.preds, feed_dict=model.fill_feed_dict(outputs)) local_index = model_preds.argmax(axis=1) # local_index = outputs[-3].argmax(axis=1) # ivqa # local_index = outputs[-4].argmax(axis=1) # vqa top_ans = np.array([cand[idx] for idx, cand in zip(local_index, outputs[3])]) ans_ids.append(top_ans) quest_id = outputs[-1] quest_ids.append(quest_id) ans_ids = np.concatenate(ans_ids) quest_ids = np.concatenate(quest_ids) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % ('v1', TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) # ana_ctx.close() return res_file, quest_ids
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: print(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.qrd_prob gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) sess = tf.Session(graph=tf.get_default_graph(), config=tf.ConfigProto(gpu_options=gpu_options)) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) gts = [] preds = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() scores = sess.run(prob, feed_dict=model.fill_feed_dict(outputs)) preds.append(scores.flatten()) gts.append(outputs[-1]) gts = np.concatenate(gts) preds = np.concatenate(preds) from scipy.io import savemat from sklearn.metrics import average_precision_score sv_file_name = os.path.basename(checkpoint_path) savemat('result/predictions_%s.mat' % sv_file_name, { 'gt': gts, 'preds': preds }) ap = average_precision_score(1.0 - gts, 1.0 - preds) return float(ap)
def backoff_func(*args, **kwargs): global lastwsquery try: return func(*args,**kwargs) except ws.WebServiceError,e: if (e.msg.find("503") != -1): util.update_progress("Caught " +webservice+ " 503, waiting 20s and trying again...") else: # A bare raise will reraise the current exception raise
def test(checkpoint_path=None): batch_size = 1 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state( FLAGS.checkpoint_dir % (FLAGS.model_type, config.feat_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() g_prob = model.prob g_att_map = model.attention_map # sess = tf.Session() sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. visualiser = PredictionVisualiser(FLAGS.model_type, do_plot=True) ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 100 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() if i < 100: continue generated_ans, att_map = sess.run([g_prob, g_att_map], feed_dict=model.fill_feed_dict( outputs[:-2])) # process attention map att_map = att_map.reshape([batch_size, 14, 14, -1]) att_map = np.transpose(att_map, [0, 3, 1, 2]) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) gt_ans = outputs[3] ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) if np.random.rand() > 0.05: visualiser.plot(quest_id, generated_ans, att_map)
def train(iteration_rounds=10, iterations=25000, filename='training'): print 'starting training' alpha_list = [] beta_list = [] for q in range(1, iteration_rounds + 1): alphas = np.ones(len(possible_pages)) betas = np.ones(len(possible_pages)) print 'Starting round ' + str(q) for i in range(0, iterations): randrunid = random.randint(0, 10000) randi = random.randint(0, 10000) page_index = beta_util.draw_from_beta_distributions( alphas=alphas, betas=betas, possible_pages=possible_pages) # welke arm wint bla = 0 while bla < 50: try: response = responder.respond_with_page( i=randi, runid=randrunid, page=possible_pages[page_index], teampw=teampw) except Exception: bla = bla + 1 continue break success = response['effect']['Success'] # 1 of 0 alphas, betas = beta_util.update_alphas_betas( index=page_index, success=success, price=possible_pages[page_index]['price'], alphas=alphas, betas=betas) util.update_progress(i / iterations) alpha_list.append(alphas) beta_list.append(betas) util.update_progress(1) print 'Round ' + str(q) + ' of ' + str(iteration_rounds) + ' finished.' alpha_list = np.array(alpha_list) beta_list = np.array(beta_list) final_alphas = alpha_list.mean(axis=0) final_betas = beta_list.mean(axis=0) beta_util.save_ab_to_filename(final_alphas, final_betas, name=filename) print 'training complete'
def add_new_track(release, possible_releases, fileid, track, trackinfo, impossible_releases): releaseid = release.id found_tracknumber = lookups.track_number(release.tracks, track) if releaseid in possible_releases: assert found_tracknumber not in possible_releases[releaseid] assert fileid not in possible_releases[releaseid].values(), ( fileid, possible_releases[releaseid]) possible_releases[releaseid][found_tracknumber] = fileid print "Found track", found_tracknumber, "(", release.tracks[ found_tracknumber - 1].title, ")", "of", release.title, ":", os.path.basename( fileid), "(tracks found: %s)\x1b[K" % (util.output_list( possible_releases[releaseid].keys())) return else: possible_releases[releaseid] = {found_tracknumber: fileid} util.update_progress( "Considering new %s - %s (found track %d)" % (release.artist.name, release.title, found_tracknumber)) # Right, lets see if we can find some other tracks quick smart for trackind in range(len(release.tracks)): # Don't waste time on things we've already found if (trackind + 1) in possible_releases[releaseid]: continue track = lookups.get_track_by_id(release.tracks[trackind].id) for fileid in trackinfo: if fileid in possible_releases[releaseid].values(): continue if trackinfo[fileid].getPUID() in track.puids: # yay, found one. if verify_track(release, possible_releases, impossible_releases, trackinfo, fileid, track): possible_releases[releaseid][trackind + 1] = fileid util.update_progress( " Also found track %02d: %s" % (trackind + 1, release.tracks[trackind].title)) break print " Found tracks: %s" % (util.output_list( possible_releases[releaseid].keys())), if util.list_difference(range(1, len(release.tracks) + 1), possible_releases[releaseid].keys()): print " Missing tracks: %s" % (util.output_list( util.list_difference(range(1, len(release.tracks) + 1), possible_releases[releaseid].keys()))) else: print
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = Reader(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. quest_ids = [] ans_preds = [] gt_labels = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run( prob, feed_dict=model.fill_feed_dict(outputs[:-2])) _gt_labels = outputs[1] gt_labels.append(_gt_labels) ans_preds.append(generated_ans) quest_id = outputs[-2] quest_ids.append(quest_id) ans_preds = np.concatenate(ans_preds) gt_labels = np.concatenate(gt_labels) return evaluate_result(ans_preds, gt_labels)
def vaq_condition(checkpoint_path=None): subset = 'dev' model_config = ModelConfig() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=False, output_qa=True, output_capt=False) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'condition') model.build() saver = tf.train.Saver() sess = tf.Session() tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver.restore(sess, checkpoint_path) fetch_op = model.losses num_batches = reader.num_batches save_file = 'data/%s_vaq_cond_score1000-2000_%s.hdf5' % ((FLAGS.model_type).lower(), subset) print('Save File: %s' % save_file) print('Running conditioning...') nlls, quest_ids = [], [] for i in range(num_batches): update_progress(i / float(num_batches)) outputs = reader.get_test_batch() im_feed, quest, _, ans_feed, quest_id, image_id = outputs losses = sess.run(fetch_op, feed_dict=model.fill_feed_dict(outputs[:-2])) scores = losses[:, :-1].mean(axis=1) scores = scores[np.newaxis, ::] nlls.append(scores) quest_ids.append(quest_id) nlls = np.concatenate(nlls, axis=0) quest_ids = np.concatenate(quest_ids, axis=0) print('\nSaving result files: %s...' % save_file) save_hdf5(save_file, {'nll': nlls, 'quest_ids': quest_ids})
def _decode(fromname, towavname): if fromname.lower().endswith(".mp3"): args = ["mpg123","--quiet","--wav",towavname,fromname] elif fromname.lower().endswith(".flac"): args = ["flac","-d", "--totally-silent", "-f", "-o", towavname,fromname] elif fromname.lower().endswith(".ogg"): args = ["oggdec","--quiet","-o",towavname,fromname] else: raise DecodeFailed(fromname, "Don't know how to decode filename") try: util.update_progress("Decoding file") ret = subprocess.call(args) except OSError,e: raise DecodeFailed(fromname, "Cannot find decoder %s" % args[0])
def test_model(ab_path='../data/alpha_beta/training.npz'): print 'starting training' alphas, betas = beta_util.load_ab(filename=ab_path) for q in range(10001, 10101): print 'Starting runId ' + str(q) runid = q revenue = 0 for i in range(0, 10001): page_index = beta_util.draw_from_beta_distributions( alphas=alphas, betas=betas, possible_pages=possible_pages) # welke arm wint bla = 0 while bla < 50: try: response = responder.respond_with_page( i=i, runid=runid, page=possible_pages[page_index], teampw=teampw) except Exception: bla = bla + 1 continue break success = response['effect']['Success'] # 1 of 0 revenue = revenue + success * possible_pages[page_index]['price'] alphas, betas = beta_util.update_alphas_betas( index=page_index, success=success, price=possible_pages[page_index]['price'], alphas=alphas, betas=betas) util.update_progress(i / 10001) util.save_profit(revenue) beta_util.save_ab(alphas, betas) util.update_progress(1) print 'RunId ' + str(q) + ' of ' + str(10100) + ' finished.' beta_util.save_ab_to_filename(alphas, betas, name='testing') print 'training complete'
def ws_backoff(func): def backoff_func(*args, **kwargs): global lastwsquery try: return func(*args,**kwargs) except ws.WebServiceError,e: if (e.msg.find("503") != -1): util.update_progress("Caught " +webservice+ " 503, waiting 20s and trying again...") else: # A bare raise will reraise the current exception raise except ws.ConnectionError,e: if (e.msg.find("urlopen error timed out") != -1): util.update_progress("Caught " +webservice+ " urlopen timeout. Retrying...") else: raise
def add_new_track(release, possible_releases, fileid, track, trackinfo, impossible_releases): releaseid = release.id found_tracknumber=lookups.track_number(release.tracks, track) if releaseid in possible_releases: assert found_tracknumber not in possible_releases[releaseid] assert fileid not in possible_releases[releaseid].values(),(fileid,possible_releases[releaseid]) possible_releases[releaseid][found_tracknumber]=fileid print "Found track",found_tracknumber,"(",release.tracks[found_tracknumber-1].title,")","of",release.title,":",os.path.basename(fileid),"(tracks found: %s)\x1b[K" % (util.output_list(possible_releases[releaseid].keys())) return else: possible_releases[releaseid]={found_tracknumber:fileid} util.update_progress("Considering new %s - %s (found track %d)" % ( release.artist.name, release.title, found_tracknumber)) # Right, lets see if we can find some other tracks quick smart for trackind in range(len(release.tracks)): # Don't waste time on things we've already found if (trackind+1) in possible_releases[releaseid]: continue track = lookups.get_track_by_id(release.tracks[trackind].id) for fileid in trackinfo: if fileid in possible_releases[releaseid].values(): continue if trackinfo[fileid].getPUID() in track.puids: # yay, found one. if verify_track(release, possible_releases, impossible_releases, trackinfo, fileid, track): possible_releases[releaseid][trackind+1]=fileid util.update_progress(" Also found track %02d: %s" % (trackind+1,release.tracks[trackind].title)) break print " Found tracks: %s" % ( util.output_list(possible_releases[releaseid].keys())), if util.list_difference(range(1,len(release.tracks)+1), possible_releases[releaseid].keys()): print " Missing tracks: %s"% ( util.output_list( util.list_difference(range(1,len(release.tracks)+1), possible_releases[releaseid].keys()))) else: print
def pipeline(self, centroids, data_file="../data/preprocessed.h5", file_path="../data/activations/", batch_size=-1, n_pool_regions=4): if not os.path.exists(file_path): os.makedirs(file_path) if batch_size == -1: meta = util.load_metadata() batch_size = meta['patches_per_image'] batches = batchreader.BatchReader(batchsize=batch_size, filepath=data_file) # dimensions = (batches.nbatches, len(centroids) * n_pool_regions ) # Set dimensions to #imagesx4*#centroids activations = np.zeros(dimensions) for i, batch in enumerate(batches): activation = self.distance_to_centroids( batch, centroids ) # Calculate activations for each patch to each centroid pooled = pool(activation, n_pool_regions=n_pool_regions ) # Returns a vector with length 4x#centroids activations[i] = pooled util.update_progress(i / batches.nbatches) util.update_progress(1) print "Normalizing activations..." activations = self.normalize(activations) print "Normalizing done" print "Writing activations to file:" f = h5py.File(file_path + str(len(centroids)) + "activationkmeans.h5", "w") dataSet = f.create_dataset("activations", dimensions, dtype=np.float64) dataSet[...] = activations f.close() print "Writing done" return activations
def trainLinearChainCRF(dataset, featureFunction, iters=10, dev_set=[]): """ Given |dataset|, do stochastic gradient descent to obtain a parameter vector. @param dataset list (list string, list string) - A collection of labeled sequences. """ stepSize = 0.9 # Get all viable tags TAGS = list(set(it.chain.from_iterable(ys for _, ys in dataset))) # Initialize with a simple CRF with 0 parameters. crf = LinearChainCRF(TAGS, featureFunction) timer = util.Timer() for i in xrange(iters): gradientCheck(crf, dataset[0][0], dataset[0][1]) # Print status lhood = sum(computeLogProbability(crf, xs, ys) for xs, ys in dataset) / len(dataset) print "Training set confusion matrix:" f1 = reportF1(dataset, crf) print "Development set confusion matrix:" dev_f1 = reportF1(dev_set, crf) if dev_set else 0. print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % ( i, lhood, f1, dev_f1) timer.start() for (j, (xs, ys)) in enumerate(dataset): gradient = computeGradient(crf, xs, ys) for key, value in gradient.iteritems(): crf.parameters[key] += stepSize * value util.update_progress(float(j) / len(dataset)) util.update_progress(1.0) stepSize *= (1. + i) / (2. + i) print 'Iter %d took %0.2f seconds' % (i, timer.ticks()) lhood = sum(computeLogProbability(crf, xs, ys) for xs, ys in dataset) / len(dataset) f1 = reportF1(dataset, crf) dev_f1 = reportF1(dev_set, crf) if dev_set else 0. print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % ( i, lhood, f1, dev_f1) return crf
def _ensure_16bit_wave(filename): """ Check the 'width' of a given WAVE file and ensure it is 16-bit.""" wav = wave.open(filename, 'rb') width = wav.getsampwidth() wav.close() if width != 2: newfile = filename + ".16bit.wav" try: util.update_progress("Forcing WAV file to 16 bit pcm") args = ["sndfile-convert", "-pcm16", filename, newfile] ret = subprocess.call(args) if ret != 0: raise DecodeFailed(filename, "Subprocess returned %d" % ret) if os.path.exists(filename): os.unlink(filename) os.rename(newfile, filename) except OSError,e: raise Exception("Only 16-bit sample widths are supported unless libsndfile is installed") finally:
def _decode(fromname, towavname): if fromname.lower().endswith(".mp3"): args = ["mpg123", "--quiet", "--wav", towavname, fromname] elif fromname.lower().endswith(".flac"): args = [ "flac", "-d", "--totally-silent", "-f", "-o", towavname, fromname ] elif fromname.lower().endswith(".ogg"): args = ["oggdec", "--quiet", "-o", towavname, fromname] else: raise DecodeFailed(fromname, "Don't know how to decode filename") try: util.update_progress("Decoding file") ret = subprocess.call(args) if ret != 0: raise DecodeFailed(fromname, "Subprocess returned %d" % ret) except OSError, e: raise DecodeFailed(fromname, "Cannot find decoder %s" % args[0])
def train(iteration_rounds = 10, iterations = 25000, filename = 'training'): print 'starting training' alpha_list = [] beta_list = [] for q in range(1,iteration_rounds+1): alphas = np.ones(len(possible_pages)) betas = np.ones(len(possible_pages)) print 'Starting round ' + str(q) for i in range(0,iterations): randrunid = random.randint(0,10000) randi = random.randint(0,10000) page_index = beta_util.draw_from_beta_distributions(alphas=alphas, betas=betas, possible_pages = possible_pages) # welke arm wint bla = 0 while bla<50: try: response = responder.respond_with_page(i=randi, runid=randrunid, page = possible_pages[page_index], teampw = teampw) except Exception: bla = bla+1 continue break success = response['effect']['Success'] # 1 of 0 alphas, betas = beta_util.update_alphas_betas(index=page_index, success = success, price = possible_pages[page_index]['price'], alphas = alphas, betas = betas) util.update_progress(i/iterations) alpha_list.append(alphas) beta_list.append(betas) util.update_progress(1) print 'Round ' + str(q) + ' of ' + str(iteration_rounds) + ' finished.' alpha_list = np.array(alpha_list) beta_list = np.array(beta_list) final_alphas = alpha_list.mean(axis=0) final_betas = beta_list.mean(axis=0) beta_util.save_ab_to_filename(final_alphas, final_betas, name=filename) print 'training complete'
def hist_eq(image_dir='test_hist/', target_dir='test_result_hist/', method='CLAHE'): if not os.path.exists(target_dir): os.makedirs(target_dir) tasks = glob.glob(image_dir + '*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir + '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count() * 2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, method)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete / job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) print 'Done!' result.join() jobs.close() result.close()
def test(checkpoint_path=None): batch_size = 128 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = TestReader(batch_size=batch_size, subset=TEST_SET) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state( FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type, FLAGS.delta)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) print('Running inference on split %s...' % TEST_SET) aug_quest_ids, scores = [], [] for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() rank_score = sess.run(model.prob, feed_dict=model.fill_feed_dict(outputs[:3])) _, quest_ids, image_ids = outputs[3:] scores.append(rank_score) aug_quest_ids.append(quest_ids) aug_quest_ids = np.concatenate(aug_quest_ids) scores = np.concatenate(scores) return convert_to_questions(aug_quest_ids, scores)
def delay(*args,**kwargs): global lastwsquery if webservice not in lastwsquery: lastwsquery[webservice]=startup lastwsquery[webservice] = max( lastwsquery[webservice], time.time() - webservices[webservice]["freequeries"] * MINDELAY) wait=0 if time.time()-lastwsquery[webservice]<MINDELAY: wait=MINDELAY-(time.time()-lastwsquery[webservice]) if PROFILE==1: util.update_progress("Waiting %.2fs for %s" % (wait,func.__name__)) time.sleep(wait) t=time.time() ret=func(*args,**kwargs) if PROFILE>=2: util.update_progress("%s took %.2fs (after a %.2fs wait)" % (func.__name__,time.time()-t,wait)) lastwsquery[webservice]+=MINDELAY return ret
def _ensure_16bit_wave(filename): """ Check the 'width' of a given WAVE file and ensure it is 16-bit.""" wav = wave.open(filename, 'rb') width = wav.getsampwidth() wav.close() if width != 2: newfile = filename + ".16bit.wav" try: util.update_progress("Forcing WAV file to 16 bit pcm") args = ["sndfile-convert", "-pcm16", filename, newfile] ret = subprocess.call(args) if ret != 0: raise DecodeFailed(filename, "Subprocess returned %d" % ret) if os.path.exists(filename): os.unlink(filename) os.rename(newfile, filename) except OSError, e: raise Exception( "Only 16-bit sample widths are supported unless libsndfile is installed" ) finally:
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'): if not os.path.exists(target_dir): os.makedirs(target_dir) #pic_list = os.listdir(image_dir) pic_list = glob.glob(image_dir+'/*.jpeg') list_length = len(pic_list) util.update_progress(0) for j, image_path in enumerate(pic_list): img = cv2.imread(image_path,1) # Use file name only, without .jpeg image_name = image_path.split('/')[-1][:-5] b,g,r = cv2.split(img) if method == 'HE': cv2.equalizeHist(b,b) cv2.equalizeHist(g,g) cv2.equalizeHist(r,r) else: clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) clahe.apply(g,g) if not method =='CLAHE_G': clahe.apply(b,b) clahe.apply(r,r) recombined = cv2.merge((b,g,r)) cv2.imwrite(target_dir + image_name + method +'.jpeg', recombined) util.update_progress(j/list_length) util.update_progress(1)
def hist_eq(image_dir = 'test_hist/', target_dir = 'test_result_hist/', method = 'CLAHE'): if not os.path.exists(target_dir): os.makedirs(target_dir) tasks = glob.glob(image_dir+'*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir+ '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count()*2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, method)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete/job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) print 'Done!' result.join() jobs.close() result.close()
def trainLinearChainCRF(dataset, featureFunction, iters = 10, dev_set = []): """ Given |dataset|, do stochastic gradient descent to obtain a parameter vector. @param dataset list (list string, list string) - A collection of labeled sequences. """ stepSize = 0.9 # Get all viable tags TAGS = list(set( it.chain.from_iterable( ys for _, ys in dataset ) )) # Initialize with a simple CRF with 0 parameters. crf = LinearChainCRF(TAGS, featureFunction) timer = util.Timer() for i in xrange(iters): gradientCheck(crf, dataset[0][0], dataset[0][1]) # Print status lhood = sum( computeLogProbability(crf, xs, ys) for xs, ys in dataset ) / len(dataset) print "Training set confusion matrix:" f1 = reportF1( dataset, crf ) print "Development set confusion matrix:" dev_f1 = reportF1( dev_set, crf ) if dev_set else 0. print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % (i, lhood, f1, dev_f1) timer.start() for (j, (xs, ys)) in enumerate(dataset): gradient = computeGradient(crf, xs, ys) for key, value in gradient.iteritems(): crf.parameters[key] += stepSize * value util.update_progress( float(j) / len(dataset) ) util.update_progress( 1.0 ) stepSize *= (1. + i) / (2. + i) print 'Iter %d took %0.2f seconds' % (i, timer.ticks()) lhood = sum( computeLogProbability(crf, xs, ys) for xs, ys in dataset ) / len(dataset) f1 = reportF1( dataset, crf ) dev_f1 = reportF1( dev_set, crf ) if dev_set else 0. print 'Iter %d, Likelihood %0.3f, Train F1: %0.3f, Dev F1 %0.3f' % (i, lhood, f1, dev_f1) return crf
def test_run(self, startid, endid, starti, endi): n_iter = 0 max_iter = (endid-startid)*(endi - starti) util.update_progress(0) start_time = time.time() for runid in np.arange(startid, endid+1, 1): for i in np.arange(starti, endi+1, 1): context = self.contextGetter.call(i, runid) page, pageindex = self.give_page(context) response = responder.respond_with_page(i=i, runid=runid, page = page, teampw = self.teampw) success = response['effect']['Success'] self.revenue += success*page['price'] self.update_alpha_betas(success, pageindex, context) n_iter += 1 util.update_progress(n_iter/max_iter) if n_iter%10 == 0: util.save_profit(self.revenue) util.update_progress(1) time_elapsed = time.time()-start_time av_time = float(time_elapsed/max_iter) print "Total time: " + str(time_elapsed) + " av_time: " + str(av_time)
def display(): if 'access_token' not in session: abort(400) access_token = session['access_token'] if 'job' in session: job = get_job_from_key(session['job'], conn) # Only rely on a previous result if the same user is logged in (same access_token) if job is not None and access_token == job.meta.get( 'access_token', None): return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used']) try: client = Dropbox(access_token) except Exception: abort(401) account = client.users_get_current_account() session['username'] = account.name.display_name space_usage = client.users_get_space_usage() allocated, used = get_space_usage_info(space_usage) total_bytes = used session['used'] = human_readable(used) session['quota'] = human_readable(allocated) job = q.enqueue(walk_entire_dropbox, access_token, total_bytes) job.meta['access_token'] = access_token job.save() update_progress(job, 0, "/") session['job'] = job.key return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used'])
def run(iterations = 350000, filename = '../data/alpha_beta/1.npz'): if not os.path.isfile(filename): alphas = np.ones(len(possible_pages)) betas = np.ones(len(possible_pages)) revenue = 0 else: alphas, betas = beta_util.load_ab() revenue = util.load_profit() for i in range(0, iterations): randi = random.randint(0,9000) randrunid = random.randint(0,10000) page_index = beta_util.draw_from_beta_distributions(alphas=alphas, betas=betas, possible_pages = possible_pages) # welke arm wint bla = 0 while bla<50: try: response = responder.respond_with_page(i=randi, runid=randrunid, page = possible_pages[page_index], teampw = teampw) except Exception: bla = bla+1 continue break success = response['effect']['Success'] # 1 of 0 alphas, betas = beta_util.update_alphas_betas(index=page_index, success = success, price = possible_pages[page_index]['price'], alphas = alphas, betas = betas) revenue = revenue + success*possible_pages[page_index]['price'] if i%100 == 0: beta_util.save_ab(alphas, betas) util.save_profit(revenue) util.update_progress(i/iterations) util.update_progress(1)
def run(self, iterations = 400000): for i in range(0, iterations): randi = random.randint(0,9900) randrunid = random.randint(0,10000) page, pageindex = self.give_page()# welke arm wint response = responder.respond_with_page(i=randi, runid=randrunid, page = page, teampw = self.teampw) success = response['effect']['Success'] # 1 of 0 self.update_alpha_betas(success, pageindex) self.revenue += success*page['price'] if i%10 == 0: self.save_ab() # util.save_profit(self.revenue) util.update_progress(i/iterations) util.update_progress(1)
def test_model(ab_path = '../data/alpha_beta/training.npz'): print 'starting training' alphas,betas = beta_util.load_ab(filename=ab_path) for q in range(10001,10101): print 'Starting runId ' + str(q) runid = q revenue = 0 for i in range(0,10001): page_index = beta_util.draw_from_beta_distributions(alphas=alphas, betas=betas, possible_pages = possible_pages) # welke arm wint bla = 0 while bla<50: try: response = responder.respond_with_page(i=i, runid=runid, page = possible_pages[page_index], teampw = teampw) except Exception: bla = bla+1 continue break success = response['effect']['Success'] # 1 of 0 revenue = revenue + success*possible_pages[page_index]['price'] alphas, betas = beta_util.update_alphas_betas(index=page_index, success = success, price = possible_pages[page_index]['price'], alphas = alphas, betas = betas) util.update_progress(i/10001) util.save_profit(revenue) beta_util.save_ab(alphas, betas) util.update_progress(1) print 'RunId ' + str(q) + ' of ' + str(10100) + ' finished.' beta_util.save_ab_to_filename(alphas, betas, name='testing') print 'training complete'
def delay(*args, **kwargs): global lastwsquery if webservice not in lastwsquery: lastwsquery[webservice] = startup lastwsquery[webservice] = max( lastwsquery[webservice], time.time() - webservices[webservice]["freequeries"] * MINDELAY) wait = 0 if time.time() - lastwsquery[webservice] < MINDELAY: wait = MINDELAY - (time.time() - lastwsquery[webservice]) if PROFILE == 1: util.update_progress("Waiting %.2fs for %s" % (wait, func.__name__)) time.sleep(wait) t = time.time() ret = func(*args, **kwargs) if PROFILE >= 2: util.update_progress("%s took %.2fs (after a %.2fs wait)" % (func.__name__, time.time() - t, wait)) lastwsquery[webservice] += MINDELAY return ret
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'): if not os.path.exists(target_dir): os.makedirs(target_dir) pic_list = os.listdir(image_dir) list_length = len(pic_list) util.update_progress(0) for j, image_name in enumerate(pic_list): img = cv2.imread(image_dir + image_name,1) cimg = copy.copy(img) height, width = img.shape[:2] helpert = height/size small_height = height/helpert small_width = width/helpert small_img = cv2.resize(img, (int(small_width),int(small_height))) ret,thresh = cv2.threshold(small_img, 10, 150, cv2.THRESH_BINARY) gray = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY) gray = cv2.medianBlur(gray,5) #find circles about the size of image height circles = cv2.HoughCircles(gray,cv2.cv.CV_HOUGH_GRADIENT,1,20, param1=100,param2=20,minRadius=int(small_height/2.05),maxRadius=int(small_height/2)+int(small_height*0.03)) if circles is None: #find circles larger than image height circles = cv2.HoughCircles(gray,cv2.cv.CV_HOUGH_GRADIENT,1,20, param1=100,param2=20,minRadius=int(small_height/2),maxRadius=int(small_height/2)+int(small_height*0.15)) if circles is None: #find circles smaller than image height circles = cv2.HoughCircles(gray,cv2.cv.CV_HOUGH_GRADIENT,1,20, param1=100,param2=20,minRadius=int(small_height/2.25),maxRadius=int(small_height/2)-int(small_height*0.02)) if not circles is None: circles = np.uint16(np.around(circles)) rad=0.0 for i in circles[0,:]: if i[2]> rad: rad = i[2] circle = i circle_init = np.zeros(shape = cimg.shape, dtype = cimg.dtype) cv2.circle(circle_init, (int((circle[0]/small_width)*width), int((circle[1]/small_height)*height)), int(circle[2]*helpert), (255,255,255), -1) cimg= cv2.bitwise_and(cimg, circle_init) cv2.imwrite(target_dir + image_name, cimg) util.update_progress(j/list_length) util.update_progress(1)
def read_test_csv(file_path = '../data/test.csv'): f = open(file_path) try: reader = csv.reader(f) reader.next() util.update_progress(0) for i, row in enumerate(reader): image = np.array(row) save_as_image(image, "testset",i , file_path = '../data/') util.update_progress(i/28000) finally: f.close() util.update_progress(1)
def store(runid, file_path = '../context/', i = 0): if not os.path.exists(file_path): os.makedirs(file_path) contextarray = [] getter = context_getter.ContextGetter(runid) print 'getting context of run: ' + str(runid) util.update_progress(0) for i, context in enumerate(getter): contextarray.append(context) util.update_progress(i/getter.max_calls) util.update_progress(1) print 'writing context to json' with open(file_path + str(runid), 'w+') as contextfile: json.dump(contextarray, contextfile)
def clear_area_around_eye(size = 256, image_dir = 'I:/AI_for_an_eyes/test/test/', target_dir = 'I:/AI_for_an_eyes/test/test_zonder_meuk_256/'): if not os.path.exists(target_dir): os.makedirs(target_dir) util.update_progress(0) tasks = glob.glob(image_dir+'*.jpeg') job_total = len(tasks) print 'Processing images matching ' + image_dir+ '*.jpeg' jobs = Queue() result = JoinableQueue() NUMBER_OF_PROCESSES = cpu_count()*2 for im_name in tasks: jobs.put(im_name) for i in xrange(NUMBER_OF_PROCESSES): p = Thread(target=worker, args=(i, jobs, result, target_dir, size)) p.daemon = True p.start() print 'Starting workers (', NUMBER_OF_PROCESSES, ')!' n_complete = 0 for t in xrange(len(tasks)): r = result.get() n_complete += 1 util.update_progress(n_complete/job_total) result.task_done() #print t, 'done' for w in xrange(NUMBER_OF_PROCESSES): jobs.put(None) util.update_progress(1) print 'Done!' time.sleep(1) result.join() jobs.close() result.close()
# This is a very broad error so it should go last (other errors inherit from IOError) except IOError,e: #url lib converts socket errors to cryptic IOErrors for some strange reason if e.errno == "socket error" and e.strerror.args[0] == "timed out": print ("Caught " +webservice+ " IO timeout") else: # A bare raise will reraise the current exception raise #bail if we've made enough attempts if i >= WSATTEMPTS: break else: i+=1 util.update_progress("waiting 20 seconds and trying again...") time.sleep(20) util.update_progress("20 second backoff is over. Retrying now...") # Reset the timer delayed uses so that we don't # end up with a bunch of queries causing # another 503 lastwsquery[webservice]=time.time() print "Giving up on call to %s after %d tries." % (webservice, WSATTEMPTS) raise backoff_func.__name__=func.__name__ return backoff_func return ws_backoff def delayed(webservice="default"):
productids = np.arange(10, 26, 1) prices = [10,15,20,25,30,35,40] pass_file = '../password.pass' f = open(pass_file, 'rb') teampw = f.next() # aantal samples waar op getrained wordt iterations = 10 #maak pagina's possible_pages = beta_util.create_possible_pages(headers=headers, adtypes=adtypes, colors=colors, productids=productids, prices=prices) util.update_progress(0) def run(iterations = 350000, filename = '../data/alpha_beta/1.npz'): if not os.path.isfile(filename): alphas = np.ones(len(possible_pages)) betas = np.ones(len(possible_pages)) revenue = 0 else: alphas, betas = beta_util.load_ab()
except ErrorResponse, e: abort(401) account = client.account_info() session['username'] = account['display_name'] quota = float(account['quota_info']['quota']) shared = float(account['quota_info']['shared']) normal = float(account['quota_info']['normal']) total_bytes = int(normal + shared) session['used'] = human_readable(normal + shared) session['quota'] = human_readable(quota) job = q.enqueue(walk, client, get_metadata(client, '/'), 0, total_bytes) job.meta['access_token'] = session['access_token']; job.save() update_progress(job, 0, "/") session['job'] = job.key return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used']) @app.route('/display_result') def display_result(): if 'job' not in session: return jsonify(ready=False, progress=0) job = get_job_from_key(session['job'], conn) if job is None: abort(400) if job.result is None: return jsonify(ready=False, current=job.meta['current'], progress=job.meta['progress'])
def guess_album2(trackinfo): # trackinfo is # <fname> => <musicfile.MusicFile> # # returns a list of possible release id's # # This version works by trying a breadth first search of releases to try # and avoid wasting a lot of time finding releases which are going to # be ignored. # # This function returns a list of release id's possible_releases={} impossible_releases=[] track_generator={} completed_releases=[] start_time = time.time() if trackinfo=={}: print "No tracks to identify?" return for (fileid,file) in trackinfo.iteritems(): track_generator[fileid]=itertools.chain( file.getTracks(), strat_musicbrainzid.generate_from_metadata(file), strat_transitive.generate_track_puid_possibilities( file.getTracks()), strat_metadata.generate_from_metadata( file, len(trackinfo)), strat_trackname.generate_track_name_possibilities( file, fileid, possible_releases) ) while track_generator!={}: timelimit = albumidentifyconfig.config.getint("albumidentify","timelimit") if timelimit > 0 and time.time() - start_time > timelimit: print "TIMEOUT EXCEEDED, GIVING UP" break fileid = choose_track( possible_releases, track_generator, trackinfo) try: track = track_generator[fileid].next() except StopIteration: end_of_track( possible_releases, impossible_releases, track_generator, trackinfo, fileid) # If we have no more possible releases for the track # we're giving up on, we can't # get any more. # So give up now. if possible_releases == {}: return continue for releaseid in (x.id for x in track.releases): # Skip releases we've already seen before. if releaseid in impossible_releases: continue util.update_progress("Considering %s" % ( musicbrainz2.utils.extractUuid(releaseid))) try: release = lookups.get_release_by_releaseid(releaseid) except Exception,e: util.report("WARNING: Unexpected exception looking for "+musicbrainz2.utils.extractUuid(releaseid)+": "+str(e)) continue # Is the track usable? if not verify_track(release, possible_releases, impossible_releases, trackinfo, fileid, track): continue add_new_track(release, possible_releases, fileid, track, trackinfo, impossible_releases) if len(possible_releases[releaseid])==len(trackinfo) \ and releaseid not in completed_releases: print release.title,"seems ok\x1b[K" print "Musicbrainz Release Id: %s.html" % release.id yield releaseid, possible_releases[releaseid] completed_releases.append(releaseid)
# This is a very broad error so it should go last (other errors inherit from IOError) except IOError,e: #url lib converts socket errors to cryptic IOErrors for some strange reason if e.errno == "socket error" and e.strerror.args[0] == "timed out": print ("Caught " +webservice+ " IO timeout") else: # A bare raise will reraise the current exception raise #bail if we've made enough attempts if i >= attempts: break else: i+=1 util.update_progress("waiting %d seconds and trying again..." % cooldown) time.sleep(cooldown) util.update_progress("%d second backoff is over. Retrying now..." % cooldown) # Reset the timer delayed uses so that we don't # end up with a bunch of queries causing # another 503 lastwsquery[webservice]=time.time() print "Giving up on call to %s after %d tries." % (webservice, attempts) raise backoff_func.__name__=func.__name__ return backoff_func return ws_backoff def delayed(webservice="default"):
def verify_track(release, possible_releases, impossible_releases, trackinfo, fileid, track): # Step One: Has this file already been found on this release? releaseid = release.id if releaseid in possible_releases and fileid in possible_releases[releaseid].values(): util.update_progress("Already found on this release:" + fileid ) return False # Step Two: Check for the right number of tracks if len(release.tracks) != len(trackinfo): # Ignore release -- wrong number of tracks util.update_progress(release.title.encode("ascii","ignore")[:40]+": wrong number of tracks (%d not %d)" % (len(release.tracks),len(trackinfo))) impossible_releases.append(releaseid) return False # Step Three: Have we found a file for this track on this release? tracknum = lookups.track_number(release.tracks, track) if releaseid in possible_releases and tracknum in possible_releases[releaseid]: util.update_progress("Already found a file for track %02d: %s" % (tracknum,possible_releases[releaseid][tracknum])) return False # Step Four: (optionally) Check that track 'n' maps to file 'n'. if FORCE_ORDER: found_tracknumber=lookups.track_number(release.tracks, track) file_ids = trackinfo.keys() file_ids = sort.sorted_list(file_ids) if found_tracknumber != file_ids.index(fileid)+1: util.update_progress(release.title[:40]+": track at wrong position") return False # Step Five: Make sure if there is another mapping on this album # that we don't accept this one. if trackinfo[fileid].getPUID() in get_puids_for_release(releaseid): if trackinfo[fileid].getPUID() not in lookups.get_track_by_id(track.id).puids: print "Track exists elsewhere on this release" print "",fileid print "",track.title for ntrackind,ntrack in enumerate(release.tracks): ntrack = lookups.get_track_by_id(ntrack.id) if trackinfo[fileid].getPUID() in ntrack.puids: print " should be:",ntrack.title return False # Step Six: Make sure the song is within 10% of the length of the # track we expect it to be. track = lookups.get_track_by_id(track.id) if track.getDuration() is not None: dur_ratio = track.getDuration() * 1.0 / trackinfo[fileid].getDuration() if dur_ratio < .9 or dur_ratio > 1.1: print "Track lengths differ" print " (%s) %s" % ( duration_to_string(trackinfo[fileid].getDuration()), trackinfo[fileid].getFilename(), ) print " (%s) %s" % ( duration_to_string(track.getDuration()), track.title, ) return False # Well, after passing through that gauntlet, we might consider this track! return True
def hash_file(fname): t = time.time() h=hashlib.md5(open(fname,"r").read()).hexdigest() util.update_progress("Hashed file (%fs)" % (time.time()-t)) return h
def populate_fingerprint_cache(fname): util.update_progress("Looking up fingerprint for "+os.path.basename(fname)) return fingerprint_any(fname)