def _push(self, app_id, app_files_dir, assert_no_files=True): token, signature = make_development_handshake('push', 'testuser', app_id) url = 'http://localhost:8000/v1/push/%s/?handshake_token=%s' \ '&handshake_signature=%s' % (app_id, token, signature) server_hashes = get_hashes(url) self.assertTrue(isinstance(server_hashes, dict)) if assert_no_files: self.assertEqual(len(server_hashes), 0) # Push our test files post_archive(app_files_dir, url, server_hashes) # Check it matches server_hashes = get_hashes(url) self.assertTrue(isinstance(server_hashes, dict)) self.assertEqual(len(server_hashes), count_files(app_files_dir))
def log_data_info(data_dir, logger): """ Log number of samples in @arg data_dir for each type. Return the number of samples in data_dir. """ type_names = [ dI for dI in os.listdir(data_dir) if os.path.isdir(pjoin(data_dir, dI)) ] total_sample_num = 0 for cls_name in type_names: cls_dir = pjoin(data_dir, cls_name) cls_num = utils.count_files(cls_dir, ".jpg") logger.info("Samples num of class {}: {}.".format(cls_name, cls_num)) total_sample_num += cls_num return total_sample_num
data = "wikiart/wikiart/" data_train = r"C:\Users\tgill\OneDrive\Documents\GD_AI\ArtGAN\wikipaintings_full\wikipaintings_train" data_test = r"C:\Users\tgill\OneDrive\Documents\GD_AI\ArtGAN\wikipaintings_full\wikipaintings_val" target_size = (224, 224) X, y, classes = load_samples(10) X_test, y_test, classes_test = load_samples(10, data_test) #m = resnet_trained(20) m = simple_gram(20) print(m.summary()) n_files_train = count_files(data_train) n_files_test = count_files(data_test) nepochs = 20 epoch_size = 2500 batch_size = 32 steps_per_epoch = (n_files_train//batch_size) v_step = 50 #n_files_test//batch_size distortions=0.1 train_paths, y_train, classes = getPaths(data_train) test_paths, y_test, classes = getPaths(data_test) prep_func = imagenet_preprocess_input
def main(): # parsing command-line options global options, db_file, _connect sql_dir = os.path.join(os.path.dirname(sys.argv[0]), 'sql') option_list = [ make_option("-o", "--out-dir", dest="out_dir", metavar="DIR", help="save updated fb2 files to this dir"), make_option("-g", "--generate-db", dest="update_db", action="store_true", default=False, help="generate db"), make_option("-d", "--do-not-delete", dest="nodel", action="store_true", default=False, help="don't delete duplicate files"), make_option("-f", "--do-not-fix", dest="nofix", action="store_true", default=False, help="don't fix an xml"), make_option("-u", "--do-not-update", dest="noup", action="store_true", default=False, help="don't update fb2 meta info"), make_option("-z", "--do-not-zip", dest="nozip", action="store_true", default=False, help="don't zip result files"), make_option("-i", "--search-id", dest="search_id", action="store_true", default=False, help="search bookid in fb2"), make_option("-a", "--save-deleted", dest="save_deleted", metavar="DIR", help="save deleted fb2 files to this dir"), make_option("-c", "--search-deleted", dest="search_deleted", metavar="DIR", help="search deleted fb2 files in this dir"), make_option("-b", "--save-bad-fb2", dest="save_bad", metavar="DIR", help="save bad fb2 files to this dir"), make_option("-s", "--sql-dir", dest="sql_dir", default=sql_dir, metavar="DIR", help="search sql files in this dir"), make_option("-e", "--output-encoding", dest="output_encoding", default='utf-8', metavar="ENC", help="fb2 output encoding"), make_option("-l", "--log-file", dest="log_file", metavar="FILE", help="output log to this file"), make_option("-n", "--not-found-file", dest="not_found", metavar="FILE", help="save missing books to this file"), make_option("-F", "--filename-pattern", dest="fn_format", metavar="PATTERN", help="output filenames pattern"), ] parser = OptionParser(option_list=option_list, usage=("usage: %prog [options] " "input-files-or-dirs"), version="%prog " + prog_version) options, args = parser.parse_args() LogOptions.level = 0 db_file = os.path.join(options.sql_dir, 'db.sqlite') _connect = sqlite3.connect(db_file) if options.update_db: # update db print_log('start update db') ret = update_db() if ret: print_log('done') else: print_log('fail') return if len(args) == 0: return # if len(args) == 0: sys.exit('wrong num args') in_file = args[0] if not options.out_dir: sys.exit('option --out-dir required') for f in args: if not os.path.exists(f): sys.exit('file does not exists: ' + f) if not os.path.isdir(options.out_dir): sys.exit('dir does not exists: ' + options.out_dir) if options.save_bad and not os.path.isdir(options.save_bad): sys.exit('dir does not exists: ' + options.save_bad) if options.save_deleted and not os.path.isdir(options.save_deleted): sys.exit('dir does not exists: ' + options.save_deleted) if not os.path.exists(db_file): print_log('start update db') ret = update_db() if ret: print_log('done') else: print_log('fail') return # stats.total_files = count_files(args) print 'total files:', stats.total_files if options.log_file: LogOptions.outfile = open(options.log_file, 'w') stats.starttime = time.time() process(args) et = time.time() - stats.starttime print 'elapsed time: %.2f secs' % et
def main(): # parsing command-line options global options, db_file, _connect sql_dir = os.path.join(os.path.dirname(sys.argv[0]), 'sql') option_list = [ make_option("-o", "--out-dir", dest="out_dir", metavar="DIR", help="save updated fb2 files to this dir"), make_option("-g", "--generate-db", dest="update_db", action="store_true", default=False, help="generate db"), make_option("-d", "--do-not-delete", dest="nodel", action="store_true", default=False, help="don't delete duplicate files"), make_option("-f", "--do-not-fix", dest="nofix", action="store_true", default=False, help="don't fix an xml"), make_option("-u", "--do-not-update", dest="noup", action="store_true", default=False, help="don't update fb2 meta info"), make_option("-z", "--do-not-zip", dest="nozip", action="store_true", default=False, help="don't zip result files"), make_option("-i", "--search-id", dest="search_id", action="store_true", default=False, help="search bookid in fb2"), make_option("-a", "--save-deleted", dest="save_deleted", metavar="DIR", help="save deleted fb2 files to this dir"), make_option("-c", "--search-deleted", dest="search_deleted", metavar="DIR", help="search deleted fb2 files in this dir"), make_option("-b", "--save-bad-fb2", dest="save_bad", metavar="DIR", help="save bad fb2 files to this dir"), make_option("-s", "--sql-dir", dest="sql_dir", default=sql_dir, metavar="DIR", help="search sql files in this dir"), make_option("-e", "--output-encoding", dest="output_encoding", default = 'utf-8', metavar="ENC", help="fb2 output encoding"), make_option("-l", "--log-file", dest="log_file", metavar="FILE", help="output log to this file"), make_option("-n", "--not-found-file", dest="not_found", metavar="FILE", help="save missing books to this file"), make_option("-F", "--filename-pattern", dest="fn_format", metavar="PATTERN", help="output filenames pattern"), ] parser = OptionParser(option_list=option_list, usage=("usage: %prog [options] " "input-files-or-dirs"), version="%prog "+prog_version) options, args = parser.parse_args() LogOptions.level = 0 db_file = os.path.join(options.sql_dir, 'db.sqlite') _connect = sqlite3.connect(db_file) if options.update_db: # update db print_log('start update db') ret = update_db() if ret: print_log('done') else: print_log('fail') return if len(args) == 0: return # if len(args) == 0: sys.exit('wrong num args') in_file = args[0] if not options.out_dir: sys.exit('option --out-dir required') for f in args: if not os.path.exists(f): sys.exit('file does not exists: '+f) if not os.path.isdir(options.out_dir): sys.exit('dir does not exists: '+options.out_dir) if options.save_bad and not os.path.isdir(options.save_bad): sys.exit('dir does not exists: '+options.save_bad) if options.save_deleted and not os.path.isdir(options.save_deleted): sys.exit('dir does not exists: '+options.save_deleted) if not os.path.exists(db_file): print_log('start update db') ret = update_db() if ret: print_log('done') else: print_log('fail') return # stats.total_files = count_files(args) print 'total files:', stats.total_files if options.log_file: LogOptions.outfile = open(options.log_file, 'w') stats.starttime = time.time() process(args) et = time.time() - stats.starttime print 'elapsed time: %.2f secs' % et
def build_mel_3seconds_groups_dataset(): conn = MyConn() config = Config() tracks = [ r[0] for r in conn.query(table="sub_tracks", targets=["track_id"], conditions={"is_valid": 1}) ] save_dir_prefix = "/Volumes/nmusic/NetEase2020/data/mel_3seconds_groups" n_dir, dir_size = 1, 100 flag, saved_files = count_files(save_dir_prefix, return_files=True) # 已经提取好的歌曲 saved_tracks = [x[:-4] for x in saved_files] q_tracks = Queue() for t in tracks: if t not in saved_tracks: q_tracks.put(t) print(q_tracks.qsize()) lock = Lock() def task(thread_id, task_args): conn = MyConn() while not q_tracks.empty(): try: tid = q_tracks.get() lock.acquire() dirpath = assign_dir(prefix=save_dir_prefix, flag=task_args["flag"], n_dir=n_dir, dir_size=dir_size) if not os.path.exists(dirpath): os.makedirs(dirpath) filepath = os.path.join(dirpath, "{}.pkl".format(tid)) task_args["flag"] += 1 lock.release() # 从数据库中获取歌曲的 chorus_start, mp3_path mp3_path, chorus_start = conn.query( table="sub_tracks", targets=["mp3_path", "chorus_start"], conditions={"track_id": tid}, fetchall=False) music_vec = get_mel_3seconds_groups(mp3_path, config, offset=chorus_start, duration=18) with open(filepath, 'wb') as f: pickle.dump(music_vec, f) except KeyboardInterrupt: print("KeyboardInterrupt. q_tracks size: {}".format( q_tracks.qsize())) break except: print(tid) print(traceback.format_exc()) sys.exit(0) task_args = {} task_args["flag"] = flag threads_group = ThreadsGroup(task=task, n_thread=5, task_args=task_args) threads_group.start()
def build_vggish_embed_dataset(): conn = MyConn() sql = "SELECT track_id FROM sub_tracks WHERE is_valid=1 AND vggish_embed_path IS NULL" tracks = [r[0] for r in conn.query(sql=sql)] save_dir_prefix = "/Volumes/nmusic/NetEase2020/data/vggish_embed" n_dir, dir_size = 1, 100 flag, saved_files = count_files(save_dir_prefix, return_files=True) # 已经提取好的歌曲 saved_tracks = [x[:-4] for x in saved_files] q_tracks = Queue() vggish = torch.hub.load("harritaylor/torchvggish", "vggish", pretrained=True) for t in tracks: if t not in saved_tracks: q_tracks.put(t) print(q_tracks.qsize()) lock = Lock() def task(thread_id, task_args): conn = MyConn() while not q_tracks.empty(): try: tid = q_tracks.get() lock.acquire() dirpath = assign_dir(prefix=save_dir_prefix, flag=task_args["flag"], n_dir=n_dir, dir_size=dir_size) if not os.path.exists(dirpath): os.makedirs(dirpath) filepath = os.path.join(dirpath, "{}.pkl".format(tid)) task_args["flag"] += 1 lock.release() # 从数据库中获取歌曲的 chorus_start, mp3_path rawmusic_path = conn.query(table="sub_tracks", targets=["rawmusic_path"], conditions={"track_id": tid}, fetchall=False)[0] with open(rawmusic_path, "rb") as f: y = pickle.load(f) embed = vggish(y, fs=22050) with open(filepath, 'wb') as f: pickle.dump(embed, f) except KeyboardInterrupt: print("KeyboardInterrupt. q_tracks size: {}".format( q_tracks.qsize())) break except: print(tid) print(traceback.format_exc()) sys.exit(0) task_args = {} task_args["flag"] = flag threads_group = ThreadsGroup(task=task, n_thread=5, task_args=task_args) threads_group.start()
def extract_chorus_mark_rawmusic(): ''' 基于每首歌的chorus_start和chorus_end提取rawmusic。 使用多线程。 ''' conn = MyConn() sql = "SELECT track_id FROM sub_tracks WHERE rawmusic_path IS NULL AND valid_bnum=0 AND chorus_start>0" tracks = [r[0] for r in conn.query(sql=sql)] save_dir_prefix = "/Volumes/nmusic/NetEase2020/data/chorus_mark_rawmusic" n_dir, dir_size = 1, 100 flag, saved_files = count_files(save_dir_prefix, return_files=True) # 已经提取好的歌曲 saved_tracks = [x[:-4] for x in saved_files] q_tracks = Queue() for t in tracks: if t not in saved_tracks: q_tracks.put(t) print(q_tracks.qsize()) lock = Lock() def task(thread_id, task_args): conn = MyConn() while not q_tracks.empty(): try: tid = q_tracks.get() lock.acquire() dirpath = assign_dir(prefix=save_dir_prefix, flag=task_args["flag"], n_dir=n_dir, dir_size=dir_size) if not os.path.exists(dirpath): os.makedirs(dirpath) filepath = os.path.join(dirpath, "{}.pkl".format(tid)) task_args["flag"] += 1 lock.release() # 从数据库中获取歌曲的 chorus_start, mp3_path chorus_start, mp3_path = conn.query( table="sub_tracks", targets=["chorus_start", "mp3_path"], conditions={"track_id": tid}, fetchall=False) y, sr = librosa.load(mp3_path, offset=chorus_start, duration=20) # 副歌识别设定为20s with open(filepath, 'wb') as f: pickle.dump(y, f) except KeyboardInterrupt: print("KeyboardInterrupt. q_tracks size: {}".format( q_tracks.qsize())) break except: print(tid) print(traceback.format_exc()) sys.exit(0) task_args = {} task_args["flag"] = flag threads_group = ThreadsGroup(task=task, n_thread=10, task_args=task_args) threads_group.start()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--source', type=str, help='Directory containing receipt images to crop') parser.add_argument('--destination', type=str, help='Directory to save cropped receipt images') parser.add_argument( '--scale', type=float, default=0.5, help= 'Factor by which to scale the images when processing for cropping. 0.5 generally yields best results' ) parser.add_argument( '--size', type=int, help= 'Height and width of final cropped image. If set, overrides individual height and width arguments' ) parser.add_argument('--width', type=int, default=299, help='Width in pixels of final cropped image') parser.add_argument('--height', type=int, default=299, help='Height in pixels of final cropped image') parser.add_argument('--vclip', type=float, default=0.5, help='Upper percentage of cropped image to utilize.') args = parser.parse_args() src = args.source dst = args.destination if not dst or dst == '': dst = src + '_cropped' clip = args.vclip scale = args.scale resize = (args.height, args.width) if args.size is not None: resize = (args.size, args.size) print 'Source Directory: ', src print 'Destination Directory: ', dst print 'Scale Factor: ', scale print 'Clip Percentage: ', clip * 100.0 print 'Output Image Size: {}px x {}px'.format(resize[0], resize[1]) print total_images = count_files(src) processed_images = 0 times = [] remaining_time = '' for filename, image in get_images(src): sys.stdout.write('Processed {} of {} images [{}]\r'.format( processed_images, total_images, remaining_time)) sys.stdout.flush() t0 = time.time() cropped_image = crop_vertical(image, scale=scale, resize=resize, clip=clip) save_image(cropped_image, dst + '/' + filename) processed_images += 1 t1 = time.time() tot_time = t1 - t0 times.append(tot_time) remaining_time_secs = (total_images - processed_images) / np.mean(times) remaining_time = '{} remaining'.format( datetime.timedelta(seconds=int(remaining_time_secs))) print 'Processed {} images in {} secs (approx {} secs per image)'.format( total_images, np.sum(times), np.mean(times))