def gather_projections(): ''' Getting the main page and saving it as when it was updated. At the bottom of the page, there's a dropdown asking for predictions from the past, so we can use that in the future if this data goes away. Then we find the links of the teams, gather those, and save as well. Directory structure is for each team to have its own directory, and then save the files based on date. This way we can handle different update time per time. ''' page = requests.get(f"{base_url}/{base_extention}") tree = html.fromstring(page.content) updated_at = tree.xpath('//*[@id="intro"]/div/div[2]/div[1]/p')[0] time_info = updated_at.text.split(' ', 1)[1] updated_at_time = datetime.datetime.strptime(time_info, "%b. %d, %Y, at %I:%M %p") time_string = updated_at_time.strftime('%Y-%m-%d') directory = f"data/fivethirtyeight/base" utils.ensure_directory_exists(directory) filename = f'{time_string}.html' filepath = f"{directory}/{filename}" with open(filepath, 'w') as f: f.write(page.text) gather_team_pages(tree, time_string, directory) return time_string #returning the date
def main(): parser = argparse.ArgumentParser('prepare maps splits (+instruments)') parser.add_argument('base_dir', help='path to the maps_piano/data folder') args = parser.parse_args() current_directory = os.getcwd() # we change the cwd to 'base_dir', so 'base_dir' is not part # of the filename that ends up in the splitfiles # we'll change back, once we write the splitfiles os.chdir(args.base_dir) filenames = collect_all_filenames(synthnames) os.chdir(current_directory) out_dir = 'splits/maps-isolated-notes' utils.ensure_directory_exists(out_dir) for synthname, volumes in filenames.items(): for volume, fns in volumes.items(): with open(os.path.join(out_dir, '{}_{}'.format(synthname, volume)), 'w') as f: write_to_file(f, fns) with open(os.path.join(out_dir, 'instruments'), 'w') as f: for si, synthname in enumerate(sorted(synthnames)): f.write('{},{}\n'.format(synthname, si))
def _get_json_filepath_from_date(date): ''' To make it easier to know where the data is going. data_type refers to whether it's the csv data or json since rg has both. ''' season = helpers.season_from_date(date) directory = f'{base_directory}/json/{season}' utils.ensure_directory_exists(directory) return f"{directory}/{date}.json"
def gather_salary_changes_by_date(date): print(f'Getting salary differences from SA for {date}') page = requests.get(base_url, params={'date': date}) season = helpers.season_from_date(date) directory = f'{base_directory}/{season}/full' utils.ensure_directory_exists(directory) filename = f'{date}.html' filepath = f'{directory}/{filename}' with open(filepath, 'w') as f: f.write(page.text)
def gather_odds_by_date(date): logger.info(f'Gathering odds for {date}') url = odds_url % date page = requests.get(url) season = helpers.season_from_date(date) directory = f'{base_directory}/{season}' utils.ensure_directory_exists(directory) filename = f'{date}.html' filepath = f'{directory}/{filename}' with open(filepath, 'w') as f: f.write(page.text) return date
def main(): parser = argparse.ArgumentParser('prepare maps splits (+instruments)') parser.add_argument('base_dir', help='path to the maps_piano/data folder') args = parser.parse_args() current_directory = os.getcwd() # we change the cwd to 'base_dir', so 'base_dir' is not part # of the filename that ends up in the splitfiles # we'll change back, once we write the splitfiles os.chdir(args.base_dir) train_pids = collect_all_piece_ids(train_synthnames) test_pids = collect_all_piece_ids(test_synthnames) print('len(train_pids)', len(train_pids)) print('len(test_pids)', len(test_pids)) train_filenames = collect_all_filenames(train_synthnames, train_pids - test_pids) test_filenames = collect_all_filenames(test_synthnames, test_pids) # this just selects the first from each synth as a 'validation' set valid_filenames = [] for synthname_a in sorted(train_synthnames): for filename, synthname_b in sorted(train_filenames): if synthname_a == synthname_b: valid_filenames.append((filename, synthname_a)) break print('len(train_filenames)', len(train_filenames)) print('len(valid_filenames)', len(valid_filenames)) print('len(test_filenames)', len(test_filenames)) os.chdir(current_directory) out_dir = 'splits/maps-non-overlapping' utils.ensure_directory_exists(out_dir) with open(os.path.join(out_dir, 'train'), 'w') as f: write_to_file(f, train_filenames) with open(os.path.join(out_dir, 'valid'), 'w') as f: write_to_file(f, valid_filenames) with open(os.path.join(out_dir, 'test'), 'w') as f: write_to_file(f, test_filenames) with open(os.path.join(out_dir, 'instruments'), 'w') as f: all_synthnames = train_synthnames | test_synthnames for si, synthname in enumerate(sorted(all_synthnames)): f.write('{},{}\n'.format(synthname, si))
def gather_games_for_season(season): ''' Gathering games by seasons. '18-19', '19-20'. We're doing this because the year in the br url has to do with the year when the finals are played. We want the directories to be clear about that ''' _, end_year = season.split('-') year = int(f'20{end_year}') directory = f"{games_base_directory}/{season}" utils.ensure_directory_exists(directory) for month_num in range(1, 13): month_name = calendar.month_name[month_num].lower() schedule_url = f"https://www.basketball-reference.com/leagues/NBA_{year}_games-{month_name}.html" logger.debug(schedule_url) page = requests.get(schedule_url) filepath = f"{directory}/{f'{month_num:02}'}.html" with open(filepath, 'w') as f: f.write(page.text)
def main(config): # state alignments adjustment phone_align_folder = config.singing_phone_align_folder ending_silence = config.singing_ending_silence state_align_original_folder = config.singing_state_align_folder phone_name_pattern = config.singing_phone_name_pattern state_align_folder = config.merlin_state_align_folder utils.log_heading(0, 'adjusting state alignments') utils.ensure_directory_exists(state_align_folder) file_lengths = adjust_alignments(phone_align_folder, ending_silence, state_align_original_folder, phone_name_pattern, state_align_folder) # lf0 file generation source_f0_folder = config.singing_f0_folder target_lf0_folder = config.merlin_lf0_folder utils.log_heading(0, 'generating lf0 files') utils.ensure_directory_exists(target_lf0_folder) generate_lf0(source_f0_folder, target_lf0_folder, file_lengths)
def export_package(index_file, outdir, unpack=False): if os.path.exists(outdir): print '目标目录已存在' exit(1) fl = FileList() fl.ParseFromString(open(index_file, 'rb').read()) assets_dir = os.path.dirname(index_file) for f in fl.files: p = get_real_path(f) print 'copy file', p if unpack: target = os.path.join(outdir, f.url) else: target = os.path.join(outdir, p) ensure_directory_exists(target) shutil.copyfile(os.path.join(assets_dir, p), target) shutil.copyfile(os.path.join(index_file), os.path.join(outdir, 'filelist'))
def export_package(index_file, outdir, unpack=False): if os.path.exists(outdir): print "目标目录已存在" exit(1) fl = FileList() fl.ParseFromString(open(index_file, "rb").read()) assets_dir = os.path.dirname(index_file) for f in fl.files: p = get_real_path(f) print "copy file", p if unpack: target = os.path.join(outdir, f.url) else: target = os.path.join(outdir, p) ensure_directory_exists(target) shutil.copyfile(os.path.join(assets_dir, p), target) shutil.copyfile(os.path.join(index_file), os.path.join(outdir, "filelist"))
def gather_team_pages(tree, time_string, directory): ''' We can get team pages from the links in a dropdown of the main page that we've already saved. ''' links = tree.xpath('//*[@id="standings-table"]/tbody//a/@href') for link in links: team_url = f"{base_url}{link}" logger.debug(f'url for not yet known team: {team_url}') page = requests.get(team_url) # find the team from the db so we have the abbrv tree = html.fromstring(page.content) team_name = tree.xpath( '//*[@id="team"]/div/div[1]/h1/span[1]/text()')[0] logger.debug(f'Found Team: {team_name}') team = actor.find_team_by_name(team_name) directory = f"{base_directory}/{team['abbrv']}" utils.ensure_directory_exists(directory) filename = f"{time_string}.html" filepath = f"{directory}/{filename}" logger.info(f"Saving {team['name']} FTE html to {filepath}") with open(filepath, 'w') as f: f.write(page.text)
import sys sys.path.append('../12net') import numpy as np import cv2 import os import numpy.random as npr from utils import IoU from utils import ensure_directory_exists anno_file = "wider_face_train.txt" im_dir = "WIDER_train/images" pos_save_dir = "../48net/48/positive" part_save_dir = "../48net/48/part" neg_save_dir = '../48net/48/negative' save_dir = "../48net/48" ensure_directory_exists(save_dir) ensure_directory_exists(pos_save_dir) ensure_directory_exists(neg_save_dir) ensure_directory_exists(part_save_dir) f1 = open(os.path.join(save_dir, 'pos_48.txt'), 'w') f2 = open(os.path.join(save_dir, 'neg_48.txt'), 'w') f3 = open(os.path.join(save_dir, 'part_48.txt'), 'w') with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print "%d pics in total" % num p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care idx = 0
import numpy as np import cv2 import os import numpy.random as npr from utils import IoU, rotate_images from utils import ensure_directory_exists # image_name, cls_label, face_up_label, bbox, # cls_label: [-1, 0, 1] ----1 positive; 0 negative; -1 part, not contribute # face_up_label: [-1, 0, 1] ----1 up; 0 down; -1 not contribute IMAGE_SIZE=24 DEBUG = False if DEBUG: target_image_dir = "plot_images" ensure_directory_exists(target_image_dir) negative_image_dir = "negative_plot_images" ensure_directory_exists(negative_image_dir) anno_file = "wider_face_train.txt" im_dir = "/media/disk1/mengfanli/new-caffe-workplace/MTCNN_workplace/mtcnn-caffe_without_landmarks/prepare_data/WIDER_train/images" pos_save_dir = "../mtcnn_rnet/24/positive" part_save_dir = "../mtcnn_rnet/24/part" neg_save_dir = '../mtcnn_rnet/24/negative' save_dir = "../mtcnn_rnet/24" ensure_directory_exists(save_dir) ensure_directory_exists(pos_save_dir) ensure_directory_exists(neg_save_dir)
def _get_site_filepath_by_date(site_abbrv, date): season = helpers.season_from_date(date) directory = f'{base_directory}/{season}/{site_abbrv}' utils.ensure_directory_exists(directory) return f'{directory}/{date}.json'
def box_scores_directory_from_date(date): _, month, _ = date.split('-') season = helpers.season_from_date(date) directory = f"{box_scores_base_directory}/{season}/{month}/{date}" utils.ensure_directory_exists(directory) return directory
def ensure_required_directories_exist(): print("Checking required directories") ensure_directory_exists(RAW_CONFIG_DIR) ensure_directory_exists(LOG_PATH) ensure_directory_exists(BORG_CACHE_DIR) init_example_config()
def main(): parser = argparse.ArgumentParser() parser.add_argument('checkpoint') parser.add_argument('plot_output_directory') parser.add_argument('--n_samples', type=int, default=30) args = parser.parse_args() batch_size = 8 direction = 'spec2labels' print('direction', direction) utils.ensure_directory_exists(args.plot_output_directory) device = 'cuda' if torch.cuda.is_available() else 'cpu' audio_options = dict( spectrogram_type='LogarithmicFilteredSpectrogram', filterbank='LogarithmicFilterbank', num_channels=1, sample_rate=44100, frame_size=4096, fft_size=4096, hop_size=441 * 4, # 25 fps num_bands=24, fmin=30, fmax=10000.0, fref=440.0, norm_filters=True, unique_filters=True, circular_shift=False, add=1.) context = dict(frame_size=1, hop_size=1, origin='center') base_directory = './data/maps_piano/data' print('loading checkpoint') checkpoint = torch.load(args.checkpoint) model = ReversibleModel( device=device, batch_size=batch_size, depth=5, ndim_tot=256, ndim_x=144, ndim_y=185, ndim_z=9, clamp=2, zeros_noise_scale=3e-2, # very magic, much hack! y_noise_scale=3e-2) # print('model', model) model.to(device) model.load_state_dict(checkpoint) # instrument_filename = './splits/tiny-min/instruments' # fold_files = ['./splits/tiny-min/AkPnBcht_F'] instrument_filename = './splits/maps-isolated-notes/instruments' # fold_files = ['./splits/maps-isolated-notes/AkPnBcht_F'] fold_base = './splits/maps-isolated-notes' fold_filenames = [ # 'AkPnBcht_F', # 'AkPnBsdf_F', # 'AkPnCGdD_F', # 'AkPnStgb_F', # 'SptkBGAm_F', # 'SptkBGCl_F', # 'StbgTGd2_F', # 'AkPnBcht_M', # 'AkPnBsdf_M', # 'AkPnCGdD_M', # 'AkPnStgb_M', # 'SptkBGAm_M', # 'SptkBGCl_M', # 'StbgTGd2_M', # 'AkPnBcht_P', # 'AkPnBsdf_P', # 'AkPnCGdD_P', # 'AkPnStgb_P', # 'SptkBGAm_P', # 'SptkBGCl_P', # 'StbgTGd2_P', 'ENSTDkCl_F', # 'ENSTDkAm_F', # 'ENSTDkCl_M', # 'ENSTDkAm_M', # 'ENSTDkCl_P' # 'ENSTDkAm_P', ] fold_files = [] for fold_filename in fold_filenames: fold_files.append(os.path.join(fold_base, fold_filename)) for fold_file in fold_files: plot_fold(direction=direction, base_directory=base_directory, instrument_filename=instrument_filename, context=context, audio_options=audio_options, batch_size=batch_size, device=device, model=model, fold_file=fold_file, n_samples=args.n_samples, plot_output_directory=args.plot_output_directory)
def main(): parser = argparse.ArgumentParser() parser.add_argument('checkpoint') parser.add_argument('output_directory') args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' direction = 'spec2labels' print('direction', direction) n_epochs = 512 meta_epoch = 12 batch_size = 32 gamma = 0.96 model = ReversibleModel( device=device, batch_size=batch_size, depth=5, ndim_tot=256, ndim_x=144, ndim_y=185, ndim_z=9, clamp=2, zeros_noise_scale=3e-2, # very magic, much hack! y_noise_scale=3e-2) model.to(device) print('loading checkpoint') checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint) audio_options = dict( spectrogram_type='LogarithmicFilteredSpectrogram', filterbank='LogarithmicFilterbank', num_channels=1, sample_rate=44100, frame_size=4096, fft_size=4096, hop_size=441 * 4, # 25 fps num_bands=24, fmin=30, fmax=10000.0, fref=440.0, norm_filters=True, unique_filters=True, circular_shift=False, add=1.) context = dict(frame_size=1, hop_size=1, origin='center') print('loading data') base_directory = './data/maps_piano/data' fold_directory = './splits/maps-non-overlapping' utils.ensure_directory_exists(args.output_directory) for fold in ['train', 'valid', 'test']: fold_output_directory = os.path.join(args.output_directory, fold) if not os.path.exists(fold_output_directory): os.makedirs(fold_output_directory) print('fold', fold) print('fold_output_directory', fold_output_directory) sequences = get_dataset_individually( base_directory=base_directory, fold_filename=os.path.join(fold_directory, fold), instrument_filename=os.path.join(fold_directory, 'instruments'), context=context, audio_options=audio_options, clazz=Spec2MidiDataset) for sequence in sequences: print('sequence.audiofilename', sequence.audiofilename) print('sequence.midifilename', sequence.midifilename) output_filename = os.path.basename(sequence.audiofilename) output_filename = os.path.splitext(output_filename)[0] output_filename = os.path.join(fold_output_directory, output_filename + '.pkl') print('output_filename', output_filename) loader = DataLoader(SqueezingDataset(sequence), batch_size=batch_size, sampler=SequentialSampler(sequence), drop_last=True) result = export(device, model, loader) result['audiofilename'] = sequence.audiofilename result['midifilename'] = sequence.midifilename torch.save(result, output_filename)
device_id = 1 threshold = [0.2, 0.6, 0.6] pnet = pcn.Pnet() pnet.load_state_dict(torch.load("../pnet/pnet_190310_iter_1238000_.pth")) pnet.eval() rnet = pcn.Rnet() rnet.load_state_dict(torch.load("../rnet/pnet_190312_iter_979000_.pth", map_location=lambda storage, loc: storage)) rnet.eval() EPS = 0.01 IMAGE_SIZE=48 DEBUG = True if DEBUG: target_image_dir = "plot_images" ensure_directory_exists(target_image_dir) anno_file = "wider_face_train.txt" im_dir = "/media/disk1/mengfanli/new-caffe-workplace/MTCNN_workplace/mtcnn-caffe_without_landmarks/prepare_data/WIDER_train/images" pos_save_dir = "../onet/48/positive_rnet" suspect_save_dir = "../onet/48/suspect_rnet" neg_save_dir = '../onet/48/negative_rnet' save_dir = "../onet/48" ensure_directory_exists(save_dir) ensure_directory_exists(pos_save_dir) ensure_directory_exists(neg_save_dir) ensure_directory_exists(suspect_save_dir) f1 = open(os.path.join(save_dir, 'pos_rnet_48.txt'), 'w') f2 = open(os.path.join(save_dir, 'neg_rnet_48.txt'), 'w')
def _contest_draftgroup_filepath_for_date(date, dgid): season = helpers.season_from_date(date) directory = f'{base_directory}/{season}/contests/{date}' utils.ensure_directory_exists(directory) return f'{directory}/{dgid}.json'
def _contest_info_filepath_for_date(date): season = helpers.season_from_date(date) directory = f'{base_directory}/{season}/contests/{date}' utils.ensure_directory_exists(directory) return f'{directory}/contest_info.json'
def _slate_player_info_filepath_for_date(slate_id, date): season = helpers.season_from_date(date) directory = f'{base_directory}/{season}/players/{date}' utils.ensure_directory_exists(directory) return f'{directory}/{slate_id}.json'
def _get_json_file_for_date_and_site(date, site_abbrv): season = helpers.season_from_date(date) directory = f'{base_directory}/{season}/json/{site_abbrv}' utils.ensure_directory_exists(directory) filepath = f'{directory}/{date}.json' return filepath