def gather_projections():
    '''
    Getting the main page and saving it as when it was updated.
    At the bottom of the page, there's a dropdown asking for
    predictions from the past, so we can use that in the future
    if this data goes away.
    Then we find the links of the teams, gather those, and save as well.
    Directory structure is for each team to have its own directory, and then
    save the files based on date. This way we can handle different update
    time per time.
    '''
    page = requests.get(f"{base_url}/{base_extention}")
    tree = html.fromstring(page.content)
    updated_at = tree.xpath('//*[@id="intro"]/div/div[2]/div[1]/p')[0]
    time_info = updated_at.text.split(' ', 1)[1]
    updated_at_time = datetime.datetime.strptime(time_info,
                                                 "%b. %d, %Y, at %I:%M %p")
    time_string = updated_at_time.strftime('%Y-%m-%d')
    directory = f"data/fivethirtyeight/base"
    utils.ensure_directory_exists(directory)
    filename = f'{time_string}.html'
    filepath = f"{directory}/{filename}"
    with open(filepath, 'w') as f:
        f.write(page.text)
    gather_team_pages(tree, time_string, directory)

    return time_string  #returning the date
Exemple #2
0
def main():
    parser = argparse.ArgumentParser('prepare maps splits (+instruments)')
    parser.add_argument('base_dir', help='path to the maps_piano/data folder')
    args = parser.parse_args()

    current_directory = os.getcwd()

    # we change the cwd to 'base_dir', so 'base_dir' is not part
    # of the filename that ends up in the splitfiles
    # we'll change back, once we write the splitfiles
    os.chdir(args.base_dir)

    filenames = collect_all_filenames(synthnames)

    os.chdir(current_directory)
    out_dir = 'splits/maps-isolated-notes'
    utils.ensure_directory_exists(out_dir)
    for synthname, volumes in filenames.items():
        for volume, fns in volumes.items():
            with open(os.path.join(out_dir, '{}_{}'.format(synthname, volume)),
                      'w') as f:
                write_to_file(f, fns)

    with open(os.path.join(out_dir, 'instruments'), 'w') as f:
        for si, synthname in enumerate(sorted(synthnames)):
            f.write('{},{}\n'.format(synthname, si))
Exemple #3
0
def _get_json_filepath_from_date(date):
    '''
    To make it easier to know where the data is going.
    data_type refers to whether it's the csv data or json
    since rg has both.
    '''
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/json/{season}'
    utils.ensure_directory_exists(directory)
    return f"{directory}/{date}.json"
Exemple #4
0
def gather_salary_changes_by_date(date):
    print(f'Getting salary differences from SA for {date}')
    page = requests.get(base_url, params={'date': date})
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}/full'
    utils.ensure_directory_exists(directory)
    filename = f'{date}.html'
    filepath = f'{directory}/{filename}'
    with open(filepath, 'w') as f:
        f.write(page.text)
def gather_odds_by_date(date):
    logger.info(f'Gathering odds for {date}')
    url = odds_url % date
    page = requests.get(url)
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}'
    utils.ensure_directory_exists(directory)
    filename = f'{date}.html'
    filepath = f'{directory}/{filename}'
    with open(filepath, 'w') as f:
        f.write(page.text)
    return date
def main():
    parser = argparse.ArgumentParser('prepare maps splits (+instruments)')
    parser.add_argument('base_dir', help='path to the maps_piano/data folder')
    args = parser.parse_args()

    current_directory = os.getcwd()

    # we change the cwd to 'base_dir', so 'base_dir' is not part
    # of the filename that ends up in the splitfiles
    # we'll change back, once we write the splitfiles
    os.chdir(args.base_dir)

    train_pids = collect_all_piece_ids(train_synthnames)
    test_pids = collect_all_piece_ids(test_synthnames)

    print('len(train_pids)', len(train_pids))
    print('len(test_pids)', len(test_pids))

    train_filenames = collect_all_filenames(train_synthnames, train_pids - test_pids)
    test_filenames = collect_all_filenames(test_synthnames, test_pids)

    # this just selects the first from each synth as a 'validation' set
    valid_filenames = []
    for synthname_a in sorted(train_synthnames):
        for filename, synthname_b in sorted(train_filenames):
            if synthname_a == synthname_b:
                valid_filenames.append((filename, synthname_a))
                break

    print('len(train_filenames)', len(train_filenames))
    print('len(valid_filenames)', len(valid_filenames))
    print('len(test_filenames)', len(test_filenames))

    os.chdir(current_directory)

    out_dir = 'splits/maps-non-overlapping'
    utils.ensure_directory_exists(out_dir)

    with open(os.path.join(out_dir, 'train'), 'w') as f:
        write_to_file(f, train_filenames)

    with open(os.path.join(out_dir, 'valid'), 'w') as f:
        write_to_file(f, valid_filenames)

    with open(os.path.join(out_dir, 'test'), 'w') as f:
        write_to_file(f, test_filenames)

    with open(os.path.join(out_dir, 'instruments'), 'w') as f:
        all_synthnames = train_synthnames | test_synthnames
        for si, synthname in enumerate(sorted(all_synthnames)):
            f.write('{},{}\n'.format(synthname, si))
def gather_games_for_season(season):
    '''
    Gathering games by seasons. '18-19', '19-20'. We're doing this because the year
    in the br url has to do with the year when the finals are played. We want the
    directories to be clear about that
    '''
    _, end_year = season.split('-')
    year = int(f'20{end_year}')
    directory = f"{games_base_directory}/{season}"
    utils.ensure_directory_exists(directory)
    for month_num in range(1, 13):
        month_name = calendar.month_name[month_num].lower()
        schedule_url = f"https://www.basketball-reference.com/leagues/NBA_{year}_games-{month_name}.html"
        logger.debug(schedule_url)
        page = requests.get(schedule_url)
        filepath = f"{directory}/{f'{month_num:02}'}.html"
        with open(filepath, 'w') as f:
            f.write(page.text)
def main(config):

    # state alignments adjustment
    phone_align_folder = config.singing_phone_align_folder
    ending_silence = config.singing_ending_silence
    state_align_original_folder = config.singing_state_align_folder
    phone_name_pattern = config.singing_phone_name_pattern
    state_align_folder = config.merlin_state_align_folder
    utils.log_heading(0, 'adjusting state alignments')
    utils.ensure_directory_exists(state_align_folder)
    file_lengths = adjust_alignments(phone_align_folder, ending_silence,
                                     state_align_original_folder,
                                     phone_name_pattern, state_align_folder)

    # lf0 file generation
    source_f0_folder = config.singing_f0_folder
    target_lf0_folder = config.merlin_lf0_folder
    utils.log_heading(0, 'generating lf0 files')
    utils.ensure_directory_exists(target_lf0_folder)
    generate_lf0(source_f0_folder, target_lf0_folder, file_lengths)
Exemple #9
0
def export_package(index_file, outdir, unpack=False):
    if os.path.exists(outdir):
        print '目标目录已存在'
        exit(1)

    fl = FileList()
    fl.ParseFromString(open(index_file, 'rb').read())

    assets_dir = os.path.dirname(index_file)

    for f in fl.files:
        p = get_real_path(f)
        print 'copy file', p
        if unpack:
            target = os.path.join(outdir, f.url)
        else:
            target = os.path.join(outdir, p)
        ensure_directory_exists(target)
        shutil.copyfile(os.path.join(assets_dir, p), target)

    shutil.copyfile(os.path.join(index_file), os.path.join(outdir, 'filelist'))
def export_package(index_file, outdir, unpack=False):
    if os.path.exists(outdir):
        print "目标目录已存在"
        exit(1)

    fl = FileList()
    fl.ParseFromString(open(index_file, "rb").read())

    assets_dir = os.path.dirname(index_file)

    for f in fl.files:
        p = get_real_path(f)
        print "copy file", p
        if unpack:
            target = os.path.join(outdir, f.url)
        else:
            target = os.path.join(outdir, p)
        ensure_directory_exists(target)
        shutil.copyfile(os.path.join(assets_dir, p), target)

    shutil.copyfile(os.path.join(index_file), os.path.join(outdir, "filelist"))
def gather_team_pages(tree, time_string, directory):
    '''
    We can get team pages from the links in a dropdown of the main
    page that we've already saved.
    '''
    links = tree.xpath('//*[@id="standings-table"]/tbody//a/@href')
    for link in links:
        team_url = f"{base_url}{link}"
        logger.debug(f'url for not yet known team: {team_url}')
        page = requests.get(team_url)
        # find the team from the db so we have the abbrv
        tree = html.fromstring(page.content)
        team_name = tree.xpath(
            '//*[@id="team"]/div/div[1]/h1/span[1]/text()')[0]
        logger.debug(f'Found Team: {team_name}')
        team = actor.find_team_by_name(team_name)
        directory = f"{base_directory}/{team['abbrv']}"
        utils.ensure_directory_exists(directory)
        filename = f"{time_string}.html"
        filepath = f"{directory}/{filename}"
        logger.info(f"Saving {team['name']} FTE html to {filepath}")
        with open(filepath, 'w') as f:
            f.write(page.text)
Exemple #12
0
import sys
sys.path.append('../12net')
import numpy as np
import cv2
import os
import numpy.random as npr
from utils import IoU
from utils import ensure_directory_exists
anno_file = "wider_face_train.txt"
im_dir = "WIDER_train/images"
pos_save_dir = "../48net/48/positive"
part_save_dir = "../48net/48/part"
neg_save_dir = '../48net/48/negative'
save_dir = "../48net/48"

ensure_directory_exists(save_dir)
ensure_directory_exists(pos_save_dir)
ensure_directory_exists(neg_save_dir)
ensure_directory_exists(part_save_dir)

f1 = open(os.path.join(save_dir, 'pos_48.txt'), 'w')
f2 = open(os.path.join(save_dir, 'neg_48.txt'), 'w')
f3 = open(os.path.join(save_dir, 'part_48.txt'), 'w')
with open(anno_file, 'r') as f:
    annotations = f.readlines()
num = len(annotations)
print "%d pics in total" % num
p_idx = 0  # positive
n_idx = 0  # negative
d_idx = 0  # dont care
idx = 0
Exemple #13
0
import numpy as np
import cv2
import os
import numpy.random as npr
from utils import IoU, rotate_images
from utils import ensure_directory_exists

# image_name, cls_label, face_up_label, bbox,
# cls_label: [-1, 0, 1]  ----1 positive; 0 negative; -1 part, not contribute
# face_up_label: [-1, 0, 1] ----1 up; 0 down; -1 not contribute

IMAGE_SIZE=24
DEBUG = False
if DEBUG:
    target_image_dir = "plot_images"
    ensure_directory_exists(target_image_dir)
    
    negative_image_dir = "negative_plot_images"
    ensure_directory_exists(negative_image_dir)


anno_file = "wider_face_train.txt"
im_dir = "/media/disk1/mengfanli/new-caffe-workplace/MTCNN_workplace/mtcnn-caffe_without_landmarks/prepare_data/WIDER_train/images"
pos_save_dir = "../mtcnn_rnet/24/positive"
part_save_dir = "../mtcnn_rnet/24/part"
neg_save_dir = '../mtcnn_rnet/24/negative'
save_dir = "../mtcnn_rnet/24"

ensure_directory_exists(save_dir)
ensure_directory_exists(pos_save_dir)
ensure_directory_exists(neg_save_dir)
Exemple #14
0
def _get_site_filepath_by_date(site_abbrv, date):
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}/{site_abbrv}'
    utils.ensure_directory_exists(directory)
    return f'{directory}/{date}.json'
def box_scores_directory_from_date(date):
    _, month, _ = date.split('-')
    season = helpers.season_from_date(date)
    directory = f"{box_scores_base_directory}/{season}/{month}/{date}"
    utils.ensure_directory_exists(directory)
    return directory
def ensure_required_directories_exist():
    print("Checking required directories")
    ensure_directory_exists(RAW_CONFIG_DIR)
    ensure_directory_exists(LOG_PATH)
    ensure_directory_exists(BORG_CACHE_DIR)
    init_example_config()
Exemple #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('checkpoint')
    parser.add_argument('plot_output_directory')
    parser.add_argument('--n_samples', type=int, default=30)
    args = parser.parse_args()
    batch_size = 8
    direction = 'spec2labels'
    print('direction', direction)
    utils.ensure_directory_exists(args.plot_output_directory)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    audio_options = dict(
        spectrogram_type='LogarithmicFilteredSpectrogram',
        filterbank='LogarithmicFilterbank',
        num_channels=1,
        sample_rate=44100,
        frame_size=4096,
        fft_size=4096,
        hop_size=441 * 4,  # 25 fps
        num_bands=24,
        fmin=30,
        fmax=10000.0,
        fref=440.0,
        norm_filters=True,
        unique_filters=True,
        circular_shift=False,
        add=1.)
    context = dict(frame_size=1, hop_size=1, origin='center')
    base_directory = './data/maps_piano/data'

    print('loading checkpoint')
    checkpoint = torch.load(args.checkpoint)
    model = ReversibleModel(
        device=device,
        batch_size=batch_size,
        depth=5,
        ndim_tot=256,
        ndim_x=144,
        ndim_y=185,
        ndim_z=9,
        clamp=2,
        zeros_noise_scale=3e-2,  # very magic, much hack!
        y_noise_scale=3e-2)
    # print('model', model)
    model.to(device)
    model.load_state_dict(checkpoint)

    # instrument_filename = './splits/tiny-min/instruments'
    # fold_files = ['./splits/tiny-min/AkPnBcht_F']
    instrument_filename = './splits/maps-isolated-notes/instruments'
    # fold_files = ['./splits/maps-isolated-notes/AkPnBcht_F']
    fold_base = './splits/maps-isolated-notes'
    fold_filenames = [
        # 'AkPnBcht_F',
        # 'AkPnBsdf_F',
        # 'AkPnCGdD_F',
        # 'AkPnStgb_F',
        # 'SptkBGAm_F',
        # 'SptkBGCl_F',
        # 'StbgTGd2_F',

        # 'AkPnBcht_M',
        # 'AkPnBsdf_M',
        # 'AkPnCGdD_M',
        # 'AkPnStgb_M',
        # 'SptkBGAm_M',
        # 'SptkBGCl_M',
        # 'StbgTGd2_M',

        # 'AkPnBcht_P',
        # 'AkPnBsdf_P',
        # 'AkPnCGdD_P',
        # 'AkPnStgb_P',
        # 'SptkBGAm_P',
        # 'SptkBGCl_P',
        # 'StbgTGd2_P',
        'ENSTDkCl_F',
        # 'ENSTDkAm_F',
        # 'ENSTDkCl_M',
        # 'ENSTDkAm_M',
        # 'ENSTDkCl_P'
        # 'ENSTDkAm_P',
    ]
    fold_files = []
    for fold_filename in fold_filenames:
        fold_files.append(os.path.join(fold_base, fold_filename))

    for fold_file in fold_files:
        plot_fold(direction=direction,
                  base_directory=base_directory,
                  instrument_filename=instrument_filename,
                  context=context,
                  audio_options=audio_options,
                  batch_size=batch_size,
                  device=device,
                  model=model,
                  fold_file=fold_file,
                  n_samples=args.n_samples,
                  plot_output_directory=args.plot_output_directory)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('checkpoint')
    parser.add_argument('output_directory')
    args = parser.parse_args()

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    direction = 'spec2labels'
    print('direction', direction)

    n_epochs = 512
    meta_epoch = 12
    batch_size = 32
    gamma = 0.96

    model = ReversibleModel(
        device=device,
        batch_size=batch_size,
        depth=5,
        ndim_tot=256,
        ndim_x=144,
        ndim_y=185,
        ndim_z=9,
        clamp=2,
        zeros_noise_scale=3e-2,  # very magic, much hack!
        y_noise_scale=3e-2)
    model.to(device)

    print('loading checkpoint')
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint)

    audio_options = dict(
        spectrogram_type='LogarithmicFilteredSpectrogram',
        filterbank='LogarithmicFilterbank',
        num_channels=1,
        sample_rate=44100,
        frame_size=4096,
        fft_size=4096,
        hop_size=441 * 4,  # 25 fps
        num_bands=24,
        fmin=30,
        fmax=10000.0,
        fref=440.0,
        norm_filters=True,
        unique_filters=True,
        circular_shift=False,
        add=1.)
    context = dict(frame_size=1, hop_size=1, origin='center')

    print('loading data')
    base_directory = './data/maps_piano/data'
    fold_directory = './splits/maps-non-overlapping'

    utils.ensure_directory_exists(args.output_directory)

    for fold in ['train', 'valid', 'test']:
        fold_output_directory = os.path.join(args.output_directory, fold)
        if not os.path.exists(fold_output_directory):
            os.makedirs(fold_output_directory)

        print('fold', fold)
        print('fold_output_directory', fold_output_directory)

        sequences = get_dataset_individually(
            base_directory=base_directory,
            fold_filename=os.path.join(fold_directory, fold),
            instrument_filename=os.path.join(fold_directory, 'instruments'),
            context=context,
            audio_options=audio_options,
            clazz=Spec2MidiDataset)

        for sequence in sequences:
            print('sequence.audiofilename', sequence.audiofilename)
            print('sequence.midifilename', sequence.midifilename)
            output_filename = os.path.basename(sequence.audiofilename)
            output_filename = os.path.splitext(output_filename)[0]
            output_filename = os.path.join(fold_output_directory,
                                           output_filename + '.pkl')

            print('output_filename', output_filename)

            loader = DataLoader(SqueezingDataset(sequence),
                                batch_size=batch_size,
                                sampler=SequentialSampler(sequence),
                                drop_last=True)

            result = export(device, model, loader)
            result['audiofilename'] = sequence.audiofilename
            result['midifilename'] = sequence.midifilename
            torch.save(result, output_filename)
device_id = 1
threshold = [0.2, 0.6, 0.6]
pnet = pcn.Pnet()
pnet.load_state_dict(torch.load("../pnet/pnet_190310_iter_1238000_.pth"))
pnet.eval()

rnet = pcn.Rnet()
rnet.load_state_dict(torch.load("../rnet/pnet_190312_iter_979000_.pth", map_location=lambda storage, loc: storage))
rnet.eval()

EPS = 0.01
IMAGE_SIZE=48
DEBUG = True
if DEBUG:
    target_image_dir = "plot_images"
    ensure_directory_exists(target_image_dir)

anno_file = "wider_face_train.txt"
im_dir = "/media/disk1/mengfanli/new-caffe-workplace/MTCNN_workplace/mtcnn-caffe_without_landmarks/prepare_data/WIDER_train/images"
pos_save_dir = "../onet/48/positive_rnet"
suspect_save_dir = "../onet/48/suspect_rnet"
neg_save_dir = '../onet/48/negative_rnet'
save_dir = "../onet/48"

ensure_directory_exists(save_dir)
ensure_directory_exists(pos_save_dir)
ensure_directory_exists(neg_save_dir)
ensure_directory_exists(suspect_save_dir)

f1 = open(os.path.join(save_dir, 'pos_rnet_48.txt'), 'w')
f2 = open(os.path.join(save_dir, 'neg_rnet_48.txt'), 'w')
Exemple #20
0
def _contest_draftgroup_filepath_for_date(date, dgid):
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}/contests/{date}'
    utils.ensure_directory_exists(directory)
    return f'{directory}/{dgid}.json'
Exemple #21
0
def _contest_info_filepath_for_date(date):
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}/contests/{date}'
    utils.ensure_directory_exists(directory)
    return f'{directory}/contest_info.json'
Exemple #22
0
def _slate_player_info_filepath_for_date(slate_id, date):
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}/players/{date}'
    utils.ensure_directory_exists(directory)
    return f'{directory}/{slate_id}.json'
def _get_json_file_for_date_and_site(date, site_abbrv):
    season = helpers.season_from_date(date)
    directory = f'{base_directory}/{season}/json/{site_abbrv}'
    utils.ensure_directory_exists(directory)
    filepath = f'{directory}/{date}.json'
    return filepath