Python make_sure_path_exists Examples, utils.make_sure_path_exists Python Examples

Example #1

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def convert_bakeoff2005_dataset(dataset):
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/bakeoff2005/{}_training.utf8'.format(dataset), 'data/{}/raw/train-all.txt'.format(dataset), True)
    convert_file('data/bakeoff2005/{}_test_gold.utf8'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    split_train_dev(dataset)

Example #2

0

Show file

File: convert_corpus.py Project: wangxiao1021/multi-criteria-cws

def make_bmes(dataset='pku'):  # + tag
    path = 'data/' + dataset + '/'
    make_sure_path_exists(path + 'bmes')
    bmes_tag(path + 'raw/train.txt', path + 'bmes/train.txt')
    bmes_tag(path + 'raw/train-all.txt', path + 'bmes/train-all.txt')
    bmes_tag(path + 'raw/dev.txt', path + 'bmes/dev.txt')
    bmes_tag(path + 'raw/test.txt', path + 'bmes/test.txt')

Example #3

0

Show file

def convert_sighan2005_dataset(dataset):
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/sighan2005/{}_training.utf8'.format(dataset), 'data/{}/raw/train-all.txt'.format(dataset), True)
    convert_file('data/sighan2005/{}_test_gold.utf8'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    split_train_dev(dataset)

Example #4

0

Show file

File: prepoccess.py Project: shepherd233/MCCWS

def make_bmes(dataset='pku',encode="utf-16"):
    path = 'data/' + dataset + '/'
    make_sure_path_exists(path + 'bmes')
    bmes_tag(path + 'raw/train.txt', path + 'bmes/train.txt',encode)
    bmes_tag(path + 'raw/train-all.txt', path + 'bmes/train-all.txt',encode)
    bmes_tag(path + 'raw/dev.txt', path + 'bmes/dev.txt',encode)
    bmes_tag(path + 'raw/test.txt', path + 'bmes/test.txt',encode)

Example #5

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def make_bmes(dataset='pku'):
    path = 'data/' + dataset + '/'
    make_sure_path_exists(path + 'bmes')
    bmes_tag(path + 'raw/train.txt', path + 'bmes/train.txt')
    bmes_tag(path + 'raw/train-all.txt', path + 'bmes/train-all.txt')
    bmes_tag(path + 'raw/dev.txt', path + 'bmes/dev.txt')
    bmes_tag(path + 'raw/test.txt', path + 'bmes/test.txt')

Example #6

0

Show file

def converter(filepath, src, dst):
    """Convert a MIDI file to a multi-track piano-roll and save the
    resulting multi-track piano-roll to the destination directory. Return a
    tuple of `midi_md5` and useful information extracted from the MIDI file.
    """
    midi_md5 = os.path.splitext(os.path.basename(filepath))[0]
    multitrack = Multitrack(beat_resolution=CONFIG["beat_resolution"], name=midi_md5)

    pm = pretty_midi.PrettyMIDI(filepath)

    # Merge tracks
    assert pm.instruments[0].name == "MELODY"
    assert pm.instruments[1].name == "BRIDGE"
    assert pm.instruments[2].name == "PIANO"

    pm.instruments[0].name = "MAIN"
    pm.instruments[0].notes = (
        pm.instruments[0].notes + pm.instruments[1].notes + pm.instruments[2].notes
    )
    del pm.instruments[2]
    del pm.instruments[1]

    multitrack.parse_pretty_midi(pm)
    midi_info = get_midi_info(pm)

    result_dir = change_prefix(os.path.dirname(filepath), src, dst)
    make_sure_path_exists(result_dir)
    multitrack.save(os.path.join(result_dir, midi_md5 + ".npz"))

    return (midi_md5, midi_info)

Example #7

0

Show file

File: derive_id_lists_lastfm.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    result_dir, src, subset_ids_path = parse_args()

    id_lists = {tag: [] for tag in TAGS}

    # Load the IDs of the songs in the subset
    with open(subset_ids_path) as f:
        subset_ids = [line.rstrip('\n').split()[1] for line in f]

    # Loop over all the songs in the subsets
    for msd_id in subset_ids:
        for dataset in ('lastfm_train', 'lastfm_test'):
            filepath = os.path.join(
                src, dataset, msd_id_to_dirs(msd_id) + '.json')
            if os.path.exists(filepath):
                with open(filepath) as f:
                    data = json.load(f)
                # Loop over all the tags annotated to the song
                for tag_freq_pair in data['tags']:
                    if tag_freq_pair[0] in TAGS:
                        # Add the ID to the corresponding tag
                        id_lists[tag_freq_pair[0]].append(msd_id)

    # Save the ID lists to files
    make_sure_path_exists(result_dir)
    for tag in TAGS:
        filename = 'id_list_{}.txt'.format(tag.lower())
        with open(os.path.join(result_dir, filename), 'w') as f:
            for msd_id in id_lists[tag]:
                f.write(msd_id + '\n')

    print("ID lists for Last.fm Dataset successfully saved.")

Example #8

0

Show file

File: derive_id_lists_lastfm.py Project: yagyapandeya/lakh-pianoroll-dataset

def main():
    """Main function."""
    result_dir, src, subset_ids_path = parse_args()

    id_lists = {tag: [] for tag in TAGS}

    # Load the IDs of the songs in the subset
    with open(subset_ids_path) as f:
        subset_ids = [line.rstrip('\n').split()[1] for line in f]

    # Loop over all the songs in the subsets
    for msd_id in subset_ids:
        for dataset in ('lastfm_train', 'lastfm_test'):
            filepath = os.path.join(src, dataset,
                                    msd_id_to_dirs(msd_id) + '.json')
            if os.path.exists(filepath):
                with open(filepath) as f:
                    data = json.load(f)
                # Loop over all the tags annotated to the song
                for tag_freq_pair in data['tags']:
                    if tag_freq_pair[0] in TAGS:
                        # Add the ID to the corresponding tag
                        id_lists[tag_freq_pair[0]].append(msd_id)

    # Save the ID lists to files
    make_sure_path_exists(result_dir)
    for tag in TAGS:
        filename = 'id_list_{}.txt'.format(tag.lower())
        with open(os.path.join(result_dir, filename), 'w') as f:
            for msd_id in id_lists[tag]:
                f.write(msd_id + '\n')

    print("ID lists for Last.fm Dataset successfully saved.")

Example #9

0

Show file

File: galSave.py Project: chengxinlun/eBOSSLens

def galSaveflux(fList, fid, savedir):
    fileDir = os.path.join(savedir, "doublet_ML")
    make_sure_path_exists(fileDir)
    fileDir = os.path.join(fileDir, str(fid) + ".pkl")
    f = open(fileDir, "wb")
    pickle.dump(fList, f)
    f.close()

Example #10

0

Show file

File: data-prepare.py Project: zhdbwe/fastNLP

def make_bmes(dataset="pku"):
    path = data_path + "/" + dataset + "/"
    make_sure_path_exists(path + "bmes")
    bmes_tag(path + "raw/train.txt", path + "bmes/train.txt")
    bmes_tag(path + "raw/train-all.txt", path + "bmes/train-all.txt")
    bmes_tag(path + "raw/dev.txt", path + "bmes/dev.txt")
    bmes_tag(path + "raw/test.txt", path + "bmes/test.txt")

Example #11

0

Show file

File: galSave.py Project: rameyer/eBOSSLens

def galSaveflux(fList, fid, savedir):
    fileDir = os.path.join(savedir, "doublet_ML")
    make_sure_path_exists(fileDir)
    fileDir = os.path.join(fileDir, str(fid) + ".pkl")
    f = open(fileDir, "wb")
    pickle.dump(fList, f)
    f.close()

Example #12

0

Show file

File: galSave.py Project: tdelubac/eBOSSLens

def plotGalaxyLens(doublet, obj, savedir, peak_candidates, preProd, nxtProd,
                   doublet_index, fit):
    if not doublet:
        ax = plt.subplot(1, 1, 1)
        plt.title('RA=' + str(obj.RA) + ', Dec=' + str(obj.DEC) + ', Plate=' +
                  str(obj.plate) + ', Fiber=' + str(obj.fiberid) +
                  ', MJD=' + str(obj.mjd) + '\n$z=' + str(obj.z) + ' \pm' +
                  str(obj.z_err) + '$, Class=' + str(obj.obj_class))
        ax.plot(obj.wave, obj.reduced_flux, 'k')
        plt.xlabel('$Wavelength\, (Angstroms)$')
        plt.ylabel('$f_{\lambda}\, (10^{-17} erg\, s^{-1} cm^{-2} Ang^{-1}$')
        ax.plot(obj.wave, fit, 'r')
        make_sure_path_exists(savedir + '/plots/')
        plt.savefig(savedir + '/plots/' + str(obj.plate) + '-' + str(obj.mjd) +
                    '-' + str(obj.fiberid) + '.png')
        plt.close()
    # If doublet, plot in two different windows
    else:
        # Plot currently inspecting spectra
        plt.figure(figsize=(14, 4))
        plt.suptitle('RA=' + str(obj.RA) + ', Dec=' + str(obj.DEC) +
                     ', Plate=' + str(obj.plate) + ', Fiber='+str(obj.fiberid) +
                     ', MJD=' + str(obj.mjd) + '\n$z=' + str(obj.z) + ' \pm' +
                     str(obj.z_err) + '$, Class=' + str(obj.obj_class))
        # Reduced flux overall
        ax1 = plt.subplot2grid((1, 3), (0, 0), colspan=2)
        ax1.plot(obj.wave[10:-10], obj.reduced_flux[10:-10], 'k')
        ax1.plot(obj.wave, fit, 'r')
        ax1.set_xlabel('$\lambda \, [\AA]$ ')
        ax1.set_ylabel(
            '$f_{\lambda}\, (10^{-17} erg\, s^{-1} cm^{-2} Ang^{-1}$')
        ax1.set_xlim([np.min(obj.wave), np.max(obj.wave)])
        # Reduced flux detail
        ax2 = plt.subplot2grid((1, 3), (0, 2))
        ax2.set_xlabel('$\lambda \, [\AA]$ ')
        ax2.locator_params(tight=True)
        ax2.set_xlim([peak_candidates[doublet_index].wavelength - 30.0,
                      peak_candidates[doublet_index].wavelength + 30.0])
        ax2.plot(obj.wave, obj.reduced_flux, 'k')
        ax2.plot(obj.wave, fit, 'r')
        ax2.set_ylim([-5, 10])
        ax2.vlines(x=obj.zline['linewave'] * (1.0 + obj.z), ymin=-10, ymax=10,
                   colors='g', linestyles='dashed')
        # Plot previous one
        if obj.fiberid != 1:
            objPre = SDSSObject(obj.plate, obj.mjd, obj.fiberid - 1,
                                obj.dataVersion, obj.baseDir)
            ax2.plot(objPre.wave, objPre.reduced_flux, 'b')
        # Plot next one
        if obj.fiberid != 1000:
            objNxt = SDSSObject(obj.plate, obj.mjd, obj.fiberid + 1,
                                obj.dataVersion, obj.baseDir)
            ax2.plot(objNxt.wave, objNxt.reduced_flux, 'g')
        # Save to file
        make_sure_path_exists(os.path.join(savedir, 'plots'))
        plt.savefig(os.path.join(savedir, 'plots', str(obj.plate) + '-' +
                                 str(obj.mjd) + '-' + str(obj.fiberid) +
                                 '.png'))
        plt.close()

Example #13

0

Show file

File: npz_to_audio.py Project: yagyapandeya/lakh-pianoroll-dataset

def main():
    """Main function."""
    dst, src, fs, tempo = parse_args()
    make_sure_path_exists(os.path.dirname(dst))
    multitrack = pypianoroll.Multitrack(src)
    pm = multitrack.to_pretty_midi(tempo)
    waveform = pm.fluidsynth()
    scipy.io.wavfile.write(dst, fs, waveform)

Example #14

0

Show file

def convert_bakeoff2005_dataset(dataset):
    print('Converting {}...'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/bakeoff2005/{}_training.utf8'.format(dataset), 'data/{}/raw/train-all.txt'.format(dataset), True)
    convert_file('data/bakeoff2005/{}_test_gold.utf8'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    split_train_dev(dataset)

Example #15

0

Show file

def make_radical(dataset='pku'):
    print('Making radical tags for {}...'.format(dataset))
    path = 'data/' + dataset
    make_sure_path_exists(path + '/radical')
    to_radical(path + '/bmes/train.txt', path + '/radical/train.txt')
    to_radical(path + '/bmes/train-all.txt', path + '/radical/train-all.txt')
    to_radical(path + '/bmes/dev.txt', path + '/radical/dev.txt')
    to_radical(path + '/bmes/test.txt', path + '/radical/test.txt')

Example #16

0

Show file

def make_bmes(dataset='pku'):
    print('Making bmes tags for {}...'.format(dataset))
    path = 'data/' + dataset + '/'
    make_sure_path_exists(path + 'bmes')
    bmes_tag(path + 'raw/train.txt', path + 'bmes/train.txt')
    bmes_tag(path + 'raw/train-all.txt', path + 'bmes/train-all.txt')
    bmes_tag(path + 'raw/dev.txt', path + 'bmes/dev.txt')
    bmes_tag(path + 'raw/test.txt', path + 'bmes/test.txt')

Example #17

0

Show file

def convert_sighan2008_dataset(dataset, utf=16):
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/sighan2008/{}_train_seg/{}_train_utf{}.seg'.format(dataset, dataset, utf),
                 'data/{}/raw/train-all.txt'.format(dataset), True, 'utf-{}'.format(utf))
    convert_file('data/sighan2008/{}_seg_truth&resource/{}_truth_utf{}.seg'.format(dataset, dataset, utf),
                 'data/{}/raw/test.txt'.format(dataset), False, 'utf-{}'.format(utf))
    split_train_dev(dataset)

Example #18

0

Show file

File: galSave.py Project: rameyer/eBOSSLens

def plotGalaxyLens(doublet, obj, savedir, peak_candidates, preProd, nxtProd,
                   doublet_index, fit):
    if not doublet:
        ax = plt.subplot(1, 1, 1)
        plt.title('RA=' + str(obj.RA) + ', Dec=' + str(obj.DEC) + ', Plate=' +
                  str(obj.plate) + ', Fiber=' + str(obj.fiberid) +
                  ', MJD=' + str(obj.mjd) + '\n$z=' + str(obj.z) + ' \pm' +
                  str(obj.z_err) + '$, Class=' + str(obj.obj_class))
        ax.plot(obj.wave, obj.reduced_flux, 'k')
        plt.xlabel('$Wavelength\, (Angstroms)$')
        plt.ylabel('$f_{\lambda}\, (10^{-17} erg\, s^{-1} cm^{-2} Ang^{-1}$')
        ax.plot(obj.wave, fit, 'r')
        make_sure_path_exists(savedir + '/plots/')
        plt.savefig(savedir + '/plots/' + str(obj.plate) + '-' + str(obj.mjd) +
                    '-' + str(obj.fiberid) + '.png')
        plt.close()
    # If doublet, plot in two different windows
    else:
        # Plot currently inspecting spectra
        plt.figure(figsize=(14, 6))
        ax1 = plt.subplot2grid((1, 3), (0, 0), colspan=2)
        plt.suptitle('RA=' + str(obj.RA) + ', Dec=' + str(obj.DEC) +
                     ', Plate=' + str(obj.plate) + ', Fiber='+str(obj.fiberid) +
                     ', MJD=' + str(obj.mjd) + '\n$z=' + str(obj.z) + ' \pm' +
                     str(obj.z_err) + '$, Class=' + str(obj.obj_class))
        ax2 = plt.subplot2grid((1, 3), (0, 2))
        ax1.plot(obj.wave[10:-10], obj.reduced_flux[10:-10], 'k')
        ax1.plot(obj.wave, fit, 'r')
        ax1.set_xlabel('$\lambda \, [\AA]$ ')
        ax1.set_ylabel(
            '$f_{\lambda}\, (10^{-17} erg\, s^{-1} cm^{-2} Ang^{-1}$')
        ax2.set_xlabel('$\lambda \, [\AA]$ ')
        ax2.locator_params(tight=True)
        ax2.set_xlim([peak_candidates[doublet_index].wavelength - 30.0,
                      peak_candidates[doublet_index].wavelength + 30.0])
        ax2.plot(obj.wave, obj.reduced_flux, 'k')
        ax2.plot(obj.wave, fit, 'r')
        ax2.set_ylim([-5, 10])
        ax2.vlines(x=obj.zline['linewave'] * (1.0 + obj.z), ymin=-10, ymax=10,
                   colors='g', linestyles='dashed')
        ax1.set_xlim([np.min(obj.wave), np.max(obj.wave)])
        # Plot previous one
        if obj.fiberid != 1:
            objPre = SDSSObject(obj.plate, obj.mjd, obj.fiberid - 1,
                                obj.dataVersion, obj.baseDir)
            ax2.plot(objPre.wave, objPre.reduced_flux, 'b')
        # Plot next one
        if obj.fiberid != 1000:
            objNxt = SDSSObject(obj.plate, obj.mjd, obj.fiberid + 1,
                                obj.dataVersion, obj.baseDir)
            ax2.plot(objNxt.wave, objNxt.reduced_flux, 'g')
        # Save to file
        make_sure_path_exists(os.path.join(savedir, 'plots'))
        plt.savefig(os.path.join(savedir, 'plots', str(obj.plate) + '-' +
                                 str(obj.mjd) + '-' + str(obj.fiberid) +
                                 '.png'))
        plt.close()

Example #19

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def convert_sxu():
    dataset = 'sxu'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/bakeoff2008/{}/train.txt'.format(dataset), 'data/{}/raw/train-all.txt'.format(dataset), True)
    convert_file('data/bakeoff2008/{}/test.txt'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    split_train_dev(dataset)
    make_bmes(dataset)

Example #20

0

Show file

def convert_sxu():
    dataset = 'sxu'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/other/{}/train.txt'.format(dataset), 'data/{}/raw/train-all.txt'.format(dataset), True)
    convert_file('data/other/{}/test.txt'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    split_train_dev(dataset)
    make_bmes(dataset)

Example #21

0

Show file

File: merger_5.py Project: kant/lakh-pianoroll-dataset

def merger(filepath, src, dst):
    """Load and merge a multitrack pianoroll and save to the given path."""
    # Load and merge the multitrack pianoroll
    multitrack = Multitrack(filepath)
    merged = get_merged(multitrack)

    # Save the merged multitrack pianoroll
    result_path = change_prefix(filepath, src, dst)
    make_sure_path_exists(os.path.dirname(result_path))
    merged.save(result_path)

Example #22

0

Show file

def merger(filepath, src, dst):
    """Load and merge a multitrack pianoroll and save to the given path."""
    # Load and merge the multitrack pianoroll
    multitrack = Multitrack(filepath)
    merged = get_merged(multitrack)

    # Save the merged multitrack pianoroll
    result_path = change_prefix(filepath, src, dst)
    make_sure_path_exists(os.path.dirname(result_path))
    merged.save(result_path)

Example #23

0

Show file

File: main.py Project: rameyer/eBOSSLens

def lensFinder(plate, mjd, fiberid, datav, datadir, savedir, lya, qso, jpt,
               bwidth, bsig, maxchi2):
    sd = os.path.join(savedir, str(plate) + "-" + str(mjd))
    make_sure_path_exists(sd)
    try:
        eBOSSLens(plate, mjd, fiberid, datav, lya, qso, jpt, sd, datadir,
                  max_chi2=maxchi2, bwidth=bwidth, bsig=bsig)
    except Exception as reason:
        text = str(plate) + " " + str(mjd) + " " + str(fiberid) + " " + \
            str(reason)
        print(text)

Example #24

0

Show file

File: binarizer.py Project: yagyapandeya/lakh-pianoroll-dataset

def binarizer(filepath, src, dst):
    """Load and binarize a multitrack pianoroll and save the resulting
    multitrack pianoroll to the destination directory."""
    # Load and binarize the multitrack pianoroll
    multitrack = Multitrack(filepath)
    multitrack.binarize()

    # Save the binarized multitrack pianoroll
    result_path = change_prefix(filepath, src, dst)
    make_sure_path_exists(os.path.dirname(result_path))
    multitrack.save(result_path)

Example #25

0

Show file

File: binarizer.py Project: kant/lakh-pianoroll-dataset

def binarizer(filepath, src, dst):
    """Load and binarize a multitrack pianoroll and save the resulting
    multitrack pianoroll to the destination directory."""
    # Load and binarize the multitrack pianoroll
    multitrack = Multitrack(filepath)
    multitrack.binarize()

    # Save the binarized multitrack pianoroll
    result_path = change_prefix(filepath, src, dst)
    make_sure_path_exists(os.path.dirname(result_path))
    multitrack.save(result_path)

Example #26

0

Show file

File: main.py Project: tdelubac/eBOSSLens

def lensFinder(plate, mjd, fiberid, datav, datadir, savedir, lya, qso, jpt,
               bwidth, bsig, maxchi2, doplot):
    sd = os.path.join(savedir, str(plate) + "-" + str(mjd))
    make_sure_path_exists(sd)
    try:
        eBOSSLens(plate, mjd, fiberid, datav, lya, qso, jpt, sd, datadir,
                  max_chi2=maxchi2, bwidth=bwidth, bsig=bsig, doPlot=doplot)
    except Exception as reason:
        text = str(plate) + " " + str(mjd) + " " + str(fiberid) + " " + \
            str(reason)
        print(text)

Example #27

0

Show file

def convert_wiki():
    dataset = 'wiki'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/wiki/generated.train.txt', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/wiki/generated.dev.txt', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/wiki/generated.test.txt', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #28

0

Show file

File: generate_training_examples.py Project: avital/hebrew-please-data-pipeline

def make_training_examples():
    for i in xrange(NUM_TRAINING_EXAMPLES):
        print "Training: {0}/{1}".format(i+1, NUM_TRAINING_EXAMPLES)
        category = random.choice(categories)
        audio_segments_dir = '{0}/{1}/train'.format(AUDIO_SEGMENTS_DIR, category)
        segment_wav_file = '{0}/{1}'.format(audio_segments_dir, random.choice(audio_segments[category]))

        example_dir = '{0}/{1}'.format(TRAINING_EXAMPLES_DIR, category)
        make_sure_path_exists(example_dir)
        example_file_prefix = '{0}/{1}'.format(example_dir, i)

        extract_random_augmented_spectrogram(segment_wav_file, example_file_prefix)

Example #29

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def convert_ctb():
    dataset = 'ctb'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/ctb/ctb6.train.seg', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/ctb/ctb6.dev.seg', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/ctb/ctb6.test.seg', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #30

0

Show file

def main():
    """Main function."""
    src, dst = parse_args()
    make_sure_path_exists(dst)

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(merger)(npz_path, src, dst)
            for npz_path in findall_endswith('.npz', src))
    else:
        for npz_path in findall_endswith('.npz', src):
            merger(npz_path, src, dst)

Example #31

0

Show file

def convert_ctb():
    dataset = 'ctb'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/other/ctb/ctb6.train.seg', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/other/ctb/ctb6.dev.seg', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/other/ctb/ctb6.test.seg', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #32

0

Show file

File: merger_5.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    src, dst = parse_args()
    make_sure_path_exists(dst)

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(merger)(npz_path, src, dst)
            for npz_path in findall_endswith('.npz', src))
    else:
        for npz_path in findall_endswith('.npz', src):
            merger(npz_path, src, dst)

Example #33

0

Show file

def convert_weibo():
    dataset = 'weibo'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    convert_file('data/weibo/nlpcc2016-word-seg-train.dat', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/weibo/nlpcc2016-wordseg-dev.dat', 'data/{}/raw/dev.txt'.format(dataset), True)
    # TODO the weibo test answer is missing
    convert_file('data/weibo/nlpcc2016-wordseg-dev.dat', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #34

0

Show file

def convert_synthetic_corpus():
    dataset = 'syn'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    split_train_dev_test('syn')
    convert_file('data/syn/train.txt', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/syn/dev.txt', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/syn/test.txt', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #35

0

Show file

File: binarizer.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    src, dst = parse_args()
    make_sure_path_exists(dst)

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(binarizer)(npz_path, src, dst)
            for npz_path in findall_endswith('.npz', src))
    else:
        for npz_path in findall_endswith('.npz', src):
            binarizer(npz_path, src, dst)

    print("Dataset successfully binarized.")

Example #36

0

Show file

File: binarizer.py Project: yagyapandeya/lakh-pianoroll-dataset

def main():
    """Main function."""
    src, dst = parse_args()
    make_sure_path_exists(dst)

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(binarizer)(npz_path, src, dst)
            for npz_path in findall_endswith('.npz', src))
    else:
        for npz_path in findall_endswith('.npz', src):
            binarizer(npz_path, src, dst)

    print("Dataset successfully binarized.")

Example #37

0

Show file

File: process.py Project: nathancahill/Processing

def process(sources, output, force):
    """Download sources and process the file to the output directory.

    \b
    SOURCES: Source JSON file or directory of files. Required.
    OUTPUT: Destination directory for generated data. Required.
    """
    for path in utils.get_files(sources):
        pathparts = utils.get_path_parts(path)
        pathparts[0] = output.strip(os.sep)
        pathparts[-1] = pathparts[-1].replace('.json', '.geojson')

        outdir = os.sep.join(pathparts[:-1])
        outfile = os.sep.join(pathparts)

        source = utils.read_json(path)
        urlfile = urlparse(source['url']).path.split('/')[-1]

        if not hasattr(adapters, source['filetype']):
            utils.error('Unknown filetype', source['filetype'], '\n')
            continue

        if os.path.isfile(outfile) and not force:
            utils.error('Skipping', path, 'since generated file exists.',
                        'Use --force to regenerate.', '\n')
            continue

        utils.info('Downloading', source['url'])

        try:
            fp = utils.download(source['url'])
        except IOError:
            utils.error('Failed to download', source['url'], '\n')
            continue

        utils.info('Reading', urlfile)

        try:
            geojson = getattr(adapters, source['filetype']).read(fp, source['properties'])
        except IOError:
            utils.error('Failed to read', urlfile)
            continue
        finally:
            os.remove(fp.name)

        utils.make_sure_path_exists(outdir)
        utils.write_json(outfile, geojson)

        utils.success('Done. Processed to', outfile, '\n')

Example #38

0

Show file

File: train.py Project: leezqcst/ID-CNN-CWS

def init_logger():
    log_formatter = logging.Formatter("%(message)s")
    logger = logging.getLogger()
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(log_formatter)
    logger.addHandler(console_handler)
    logger.setLevel(logging.INFO)
    root_dir = FLAGS.model_dir if FLAGS.model_dir != '' else FLAGS.load_dir
    make_sure_path_exists(root_dir)
    if not os.path.exists(root_dir):
        os.mkdir(root_dir)
    file_handler = logging.FileHandler("{0}/info.log".format(root_dir), mode='a')
    file_handler.setFormatter(log_formatter)
    logger.addHandler(file_handler)
    return logger

Example #39

0

Show file

File: data-prepare.py Project: zhdbwe/fastNLP

def convert_sighan2005_dataset(dataset):
    global sighan05_root
    root = os.path.join(data_path, dataset)
    make_sure_path_exists(root)
    make_sure_path_exists(root + "/raw")
    file_path = "{}/{}_training.utf8".format(sighan05_root, dataset)
    convert_file(file_path, "{}/raw/train-all.txt".format(root),
                 is_traditional(dataset), True)
    if dataset == "as":
        file_path = "{}/{}_testing_gold.utf8".format(sighan05_root, dataset)
    else:
        file_path = "{}/{}_test_gold.utf8".format(sighan05_root, dataset)
    convert_file(file_path, "{}/raw/test.txt".format(root),
                 is_traditional(dataset), False)
    split_train_dev(dataset)

Example #40

0

Show file

File: train.py Project: zhe1234zou/Chatbot_CN

def init_logger():
    log_formatter = logging.Formatter("%(message)s")
    logger = logging.getLogger()
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(log_formatter)
    logger.addHandler(console_handler)
    logger.setLevel(logging.INFO)
    root_dir = FLAGS.model_dir if FLAGS.model_dir != '' else FLAGS.load_dir
    make_sure_path_exists(root_dir)
    if not os.path.exists(root_dir):
        os.mkdir(root_dir)
    file_handler = logging.FileHandler("{0}/info.log".format(root_dir),
                                       mode='a')
    file_handler.setFormatter(log_formatter)
    logger.addHandler(file_handler)
    return logger

Example #41

0

Show file

def convert_conll(dataset):
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')

    extract_conll('data/other/{}/dev.conll'.format(dataset), 'data/{}/dev.txt'.format(dataset))
    extract_conll('data/other/{}/test.conll'.format(dataset), 'data/{}/test.txt'.format(dataset))
    extract_conll('data/other/{}/train.conll'.format(dataset), 'data/{}/train.txt'.format(dataset))

    convert_file('data/{}/train.txt'.format(dataset), 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/{}/dev.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/{}/test.txt'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #42

0

Show file

def convert_cncorpus():
    dataset = 'cnc'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    remove_pos('data/other/cnc/train.txt', 'data/cnc/train-no-pos.txt')
    remove_pos('data/other/cnc/dev.txt', 'data/cnc/dev-no-pos.txt')
    remove_pos('data/other/cnc/test.txt', 'data/cnc/test-no-pos.txt')

    convert_file('data/cnc/train-no-pos.txt', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/cnc/dev-no-pos.txt', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/cnc/test-no-pos.txt', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #43

0

Show file

def convert_zhuxian():
    dataset = 'zx'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    remove_pos('data/other/zx/dev.zhuxian.wordpos', 'data/zx/dev.txt', '_')
    remove_pos('data/other/zx/train.zhuxian.wordpos', 'data/zx/train.txt', '_')
    remove_pos('data/other/zx/test.zhuxian.wordpos', 'data/zx/test.txt', '_')

    convert_file('data/zx/train.txt', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/zx/dev.txt', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/zx/test.txt', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #44

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def convert_conll(dataset):
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')

    extract_conll('data/{}/dev.conll'.format(dataset), 'data/{}/dev.txt'.format(dataset))
    extract_conll('data/{}/test.conll'.format(dataset), 'data/{}/test.txt'.format(dataset))
    extract_conll('data/{}/train.conll'.format(dataset), 'data/{}/train.txt'.format(dataset))

    convert_file('data/{}/train.txt'.format(dataset), 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/{}/dev.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/{}/test.txt'.format(dataset), 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #45

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def convert_cncorpus():
    dataset = 'cnc'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    remove_pos('data/cnc/train.txt', 'data/cnc/train-no-pos.txt')
    remove_pos('data/cnc/dev.txt', 'data/cnc/dev-no-pos.txt')
    remove_pos('data/cnc/test.txt', 'data/cnc/test-no-pos.txt')

    convert_file('data/cnc/train-no-pos.txt', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/cnc/dev-no-pos.txt', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/cnc/test-no-pos.txt', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #46

0

Show file

File: convert_corpus.py Project: leezqcst/ID-CNN-CWS

def convert_zhuxian():
    dataset = 'zx'
    print('Converting corpus {}'.format(dataset))
    root = 'data/' + dataset
    make_sure_path_exists(root)
    make_sure_path_exists(root + '/raw')
    remove_pos('data/zx/dev.zhuxian.wordpos', 'data/zx/dev.txt', '_')
    remove_pos('data/zx/train.zhuxian.wordpos', 'data/zx/train.txt', '_')
    remove_pos('data/zx/test.zhuxian.wordpos', 'data/zx/test.txt', '_')

    convert_file('data/zx/train.txt', 'data/{}/raw/train.txt'.format(dataset), True)
    convert_file('data/zx/dev.txt', 'data/{}/raw/dev.txt'.format(dataset), True)
    convert_file('data/zx/test.txt', 'data/{}/raw/test.txt'.format(dataset), False)
    combine_files('data/{}/raw/train.txt'.format(dataset), 'data/{}/raw/dev.txt'.format(dataset),
                  'data/{}/raw/train-all.txt'.format(dataset))
    make_bmes(dataset)

Example #47

0

Show file

File: collector.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    src, dst, id_list_path = parse_args()
    make_sure_path_exists(dst)

    with open(id_list_path) as f:
        id_list = [line.split() for line in f]

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(collector)(midi_md5, msd_id, src, dst)
            for midi_md5, msd_id in id_list)
    else:
        for midi_md5, msd_id in id_list:
            collector(midi_md5, msd_id, src, dst)

    print("Subset successfully collected for: {}".format(id_list_path))

Example #48

0

Show file

File: collector.py Project: yagyapandeya/lakh-pianoroll-dataset

def main():
    """Main function."""
    src, dst, id_list_path = parse_args()
    make_sure_path_exists(dst)

    with open(id_list_path) as f:
        id_list = [line.split() for line in f]

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(collector)(midi_md5, msd_id, src, dst)
            for midi_md5, msd_id in id_list)
    else:
        for midi_md5, msd_id in id_list:
            collector(midi_md5, msd_id, src, dst)

    print("Subset successfully collected for: {}".format(id_list_path))

Example #49

0

Show file

def export_gen_graph(tf,
                     sess,
                     variables_filter,
                     variables_bias,
                     variables_scalars,
                     path,
                     name="gen_export.pb",
                     width=224,
                     ratio=1.0):

    var_gen_filter_new = []
    for i in range(len(variables_filter)):
        var_gen_filter_new.append(sess.run(variables_filter[i]))

    var_gen_bias_new = []
    for i in range(len(variables_bias)):
        var_gen_bias_new.append(sess.run(variables_bias[i]))

    var_gen_scalars_new = []
    for i in range(len(variables_scalars)):
        var_gen_scalars_new.append(sess.run(variables_scalars[i]))

    to_graph = tf.Graph()
    with to_graph.as_default() as g:
        gn.build_gen_graph_deep(tf,
                                trainable=False,
                                variables_gen_filter=var_gen_filter_new,
                                variables_gen_bias=var_gen_bias_new,
                                variables_scalars=var_gen_scalars_new,
                                width_res=width,
                                ratio=ratio)

        #saver = tf.train.Saver(tf.all_variables())
        utils.make_sure_path_exists(conf.project_path + conf.output_generator +
                                    path)
        with tf.Session() as new_sess:
            init = tf.global_variables_initializer()
            new_sess.run(init)
            #summary_writer = tf.train.SummaryWriter(project_path + log_generator, graph_def=new_sess.graph_def)

            #saver.save(new_sess, project_path + "\\android_exports" + path + name)
            tf.train.write_graph(tf.get_default_graph(),
                                 conf.project_path + conf.output_generator +
                                 path,
                                 name,
                                 as_text=False)

Example #50

0

Show file

File: jptSave.py Project: tdelubac/eBOSSLens

def plot_Jackpot(obj, peak,em_lines, savedir, counter):
    '''
    jptSave.plot_Jackpot(obj, peak,em_lines, savedir, counter)
    =========================================================
    Plots Jackpot lens candidates

    Parameters:
        obj: The SDSS object/spectra on which applied the subtraction
        peak_candidates: The inquired peaks
        savedir: Directory to save the plots/data
        em_lines: Rest frame ELG emission lines
        counter: To keep track of each candidates per spectra
    Returns:
        - Nothing. Create and save the plot.
    '''
    
    fontP = FontProperties()
    fontP.set_size('medium')
    plt.suptitle(SDSSname(obj.RA,obj.DEC)+'\n'+'RA='+str(obj.RA)+
        ', Dec='+str(obj.DEC) +', $z_{QSO}='+'{:03.3}'.format(obj.z)+ '$')

    gs = gridspec.GridSpec(1,4)
    p1 = plt.subplot(gs[0,:4])

    smoothed_flux = np.array([np.mean(obj.flux[ii-2:ii+3]) 
        for ii in range(len(obj.flux)) if (ii>4 and ii<len(obj.flux)-4)])

    p1.plot(obj.wave[5:-4], smoothed_flux, 'k', label = 'BOSS Flux', drawstyle='steps-mid')
    #p1.plot(wave,  flux, 'k', label = 'BOSS Flux')
    p1.plot(obj.wave, obj.synflux, 'r', label = 'PCA fit')
    box = p1.get_position()
    p1.set_position([box.x0,box.y0+0.02,box.width*0.9,box.height])
    p1.set_ylim(np.min(obj.synflux)-3, np.max(obj.synflux)+3)
    p1.vlines(x = em_lines*(1+peak.z_1),ymin= -100,ymax= 100,colors= 'g',linestyles='dashed')
    p1.vlines(x = em_lines*(1+peak.z_2),ymin= -100,ymax= 100,colors= 'b',linestyles='dashed')

    p1.legend(loc='upper right', bbox_to_anchor = (1.2,1), ncol = 1, prop=fontP)
    p1.set_xlim(3500,10500)
    plt.ylabel('Flux [$10^{-17} erg\, s^{-1} cm^{-2}  \AA^{-1}]$')

    make_sure_path_exists(savedir +'/plots/')

    plt.savefig(savedir +'/plots/'+SDSSname(obj.RA,obj.DEC)+ '-' + str(obj.plate) 
        + '-' + str(obj.mjd) + '-' + str(obj.fiberid) + '-'+str(counter) +'.png')
    plt.close()

Example #51

0

Show file

File: split_training_and_validation.py Project: avital/hebrew-please-data-pipeline

def split(videos_dict):
    """For each video in each category, split its audio file into minute
    segments. For each minute segment, extract the first 50 seconds
    into an audio file to be used for training data, and the last 10
    seconds into an audio file to be used for validation data.

    """
    for category in videos_dict:
        for url in videos_dict[category]:
            video_id = url
            video_wav_file = '{0}/{1}/{2}/audio.wav'.format(AUDIO_DIR, category, video_id)

            train_segments_dir = '{0}/{1}/train'.format(AUDIO_SEGMENTS_DIR, category)
            val_segments_dir = '{0}/{1}/val'.format(AUDIO_SEGMENTS_DIR, category)
            make_sure_path_exists(train_segments_dir)
            make_sure_path_exists(val_segments_dir)

            split_segments(video_wav_file, video_id, train_segments_dir, val_segments_dir)

Example #52

0

Show file

File: derive_labels_amg.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    result_dir, src, subset_ids_path = parse_args()

    # Parse the label of each song
    id_label_masd = {}
    with open(src) as f:
        for line in f:
            if line.startswith('#'):
                continue
            id_label_masd[line.split()[0]] = LABEL_NUM_MAP[line.split()[1]]

    # Load the IDs of the songs in the subset
    with open(subset_ids_path) as f:
        subset_ids = [line.rstrip('\n').split()[1] for line in f]

    # Loop over all the songs in the subset
    collected = {}
    for msd_id in subset_ids:
        label = id_label_masd.get(msd_id)
        if label is None:
            continue
        collected[msd_id] = label

    # Save the ID label pairs to a file
    make_sure_path_exists(result_dir)
    filepath = os.path.join(result_dir, 'masd_labels.txt')
    with open(filepath, 'w') as f:
        f.write("# msd_id, label_num\n")
        for msd_id in collected:
            f.write("{}    {}\n".format(msd_id, collected[msd_id]))
    print("Labels successfully saved.")

    # Save the cleansed ID label pairs to a file
    cleansed = {}
    for msd_id in collected:
        if collected[msd_id] in CLEANSED_LABELS:
            cleansed[msd_id] = CLEANSED_LABELS.index(collected[msd_id])
    filepath = os.path.join(result_dir, 'masd_labels_cleansed.txt')
    with open(filepath, 'w') as f:
        f.write("# msd_id, label_num\n")
        for msd_id in cleansed:
            f.write("{}    {}\n".format(msd_id, cleansed[msd_id]))
    print("Cleansed labels successfully saved.")

Example #53

0

Show file

File: converter.py Project: kant/lakh-pianoroll-dataset

def converter(filepath, src, dst):
    """Convert a MIDI file to a multi-track piano-roll and save the
    resulting multi-track piano-roll to the destination directory. Return a
    tuple of `midi_md5` and useful information extracted from the MIDI file.
    """
    try:
        midi_md5 = os.path.splitext(os.path.basename(filepath))[0]
        multitrack = Multitrack(beat_resolution=CONFIG['beat_resolution'],
                                name=midi_md5)

        pm = pretty_midi.PrettyMIDI(filepath)
        multitrack.parse_pretty_midi(pm)
        midi_info = get_midi_info(pm)

        result_dir = change_prefix(os.path.dirname(filepath), src, dst)
        make_sure_path_exists(result_dir)
        multitrack.save(os.path.join(result_dir, midi_md5 + '.npz'))

        return (midi_md5, midi_info)

    except:
        return None

Example #54

0

Show file

File: derive_id_lists_amg.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    result_dir, src, subset_ids_path = parse_args()

    # Parse the label of each song
    tag_dict = {}
    with open(src) as f:
        for line in f:
            if line.startswith('#'):
                continue
            elif len(line.split()) == 2:
                tag_dict[line.split()[0]] = line.split()[1]
            elif len(line.split()) > 2:
                tag_dict[line.split()[0]] = '-'.join(line.split()[1:])

    tags = set(tag_dict.values())
    id_lists = {tag: [] for tag in tags}

    # Load the IDs of the songs in the subset
    with open(subset_ids_path) as f:
        subset_ids = [line.rstrip('\n').split()[1] for line in f]

    # Loop over all the songs in the subset
    for msd_id in subset_ids:
        tag = tag_dict.get(msd_id)
        if tag is None:
            continue
        # Add the ID to the corresponding tag
        id_lists[tag].append(msd_id)

    # Save the ID lists to files
    make_sure_path_exists(result_dir)
    for tag in tags:
        filename = 'id_list_{}.txt'.format(tag)
        with open(os.path.join(result_dir, filename), 'w') as f:
            for msd_id in id_lists[tag]:
                f.write(msd_id + '\n')

    print("ID lists for Million Song Dataset Benchmarks successfully saved.")

Example #55

0

Show file

File: plotter.py Project: Corlobin/brute-force-plotter

def _create_directories(output_path):
    distribution_path = os.path.join(output_path, 'distributions')
    two_d_interaction_path = os.path.join(output_path, '2d_interactions')
    three_d_interaction_path = os.path.join(output_path, '3d_interactions')

    make_sure_path_exists(distribution_path)
    make_sure_path_exists(two_d_interaction_path)
    make_sure_path_exists(three_d_interaction_path)
    return distribution_path, two_d_interaction_path, three_d_interaction_path

Example #56

0

Show file

File: converter.py Project: kant/lakh-pianoroll-dataset

def main():
    """Main function."""
    src, dst, midi_info_path = parse_args()
    make_sure_path_exists(dst)
    midi_info = {}

    if CONFIG['multicore'] > 1:
        kv_pairs = joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(converter)(midi_path, src, dst)
            for midi_path in findall_endswith('.mid', src))
        for kv_pair in kv_pairs:
            if kv_pair is not None:
                midi_info[kv_pair[0]] = kv_pair[1]
    else:
        for midi_path in findall_endswith('.mid', src):
            kv_pair = converter(midi_path, src, dst)
            if kv_pair is not None:
                midi_info[kv_pair[0]] = kv_pair[1]

    if midi_info_path is not None:
        with open(midi_info_path, 'w') as f:
            json.dump(midi_info, f)

    print("{} files have been successfully converted".format(len(midi_info)))

Example #57

0

Show file

File: qsoSave.py Project: tdelubac/eBOSSLens

def plotQSOGal(obj, peak, savedir,em_lines, n ):
    '''
    qsoSave.plotQSOGal(obj,peak_candidates, savedir)
    ====================================

    Parameters:
        obj: inspected spectra
        peak: Inquired peak on the spectra
        savedir: Directory to save the plots and info
        em_lines: rest frame ELG emission lines
        n: numerotation of plots
    Returns: 
        - Nothing. Prints peak info and save plots

    '''

    make_sure_path_exists(savedir +'/plots/')
    z_backgal = peak.redshift

    fontP = FontProperties()
    fontP.set_size('medium')

    plt.suptitle(SDSSname(obj.RA,obj.DEC)+'\n'+'RA='+str(obj.RA)+
        ', Dec='+str(obj.DEC) +', $z_{QSO}='+'{:03.3}'.format(obj.z)+ '$')

    gs = gridspec.GridSpec(2,4)
    p1 = plt.subplot(gs[0,:4])

    smoothed_flux = np.array([np.mean(obj.flux[ii-2:ii+3]) 
        for ii in range(len(obj.flux)) if (ii>4 and ii<len(obj.flux)-4)])

    p1.plot(obj.wave[5:-4], smoothed_flux, 'k', label = 'BOSS Flux', drawstyle='steps-mid')
    p1.plot(obj.wave, obj.synflux, 'r', label = 'PCA fit')

    #if z<1 and show == True:
    #    p1.plot(HB_wave, lorentz(HB_wave, params_beta[0],params_beta[1],params_beta[2]) +HB_wave*line_coeff[0] + line_coeff[1], '--g')
    
    box = p1.get_position()
    p1.set_position([box.x0,box.y0+0.02,box.width*0.9,box.height])

    p1.set_ylim(np.min(obj.synflux)-3, np.max(obj.synflux)+3)

    p1.vlines(x = em_lines*(1+z_backgal),ymin= -100,ymax= 100,colors= 'g',linestyles='dashed')
    p1.legend(loc='upper right', bbox_to_anchor = (1.2,1), ncol = 1, prop=fontP)
    p1.set_xlim(3500,10500)
    plt.ylabel('Flux [$10^{-17} erg\, s^{-1} cm^{-2}  \AA^{-1}]$')

    p2 = plt.subplot(gs[1,:1])
    p2.vlines(x = em_lines*(1+z_backgal),ymin= -100,ymax= 100,colors= 'g',linestyles='dashed')
    loc_flux =obj.flux[obj.wave2bin((1+z_backgal)*(3727-10)) : obj.wave2bin((1+z_backgal)*(3727+10))]
    p2.plot(obj.wave[obj.wave2bin((1+z_backgal)*(3727-10)) :obj.wave2bin((1+z_backgal)*(3727+10))],
        loc_flux,'k', label = 'OII', drawstyle='steps-mid')
    p2.plot(obj.wave[obj.wave2bin((1+z_backgal)*(3727-10)) :obj.wave2bin((1+z_backgal)*(3727+10))],
        obj.synflux[obj.wave2bin((1+z_backgal)*(3727-10)) :obj.wave2bin((1+z_backgal)*(3727+10))],
        'r', label = 'OII', drawstyle='steps-mid')
    
    if loc_flux != []:
        p2.set_ylim(np.min(loc_flux)-1,np.max(loc_flux)+1)

    plt.title('[OII] 3727')
    p2.set_xlim((1+z_backgal)*(3727-10),(1+z_backgal)*(3727+10))
    x1 = int((1+z_backgal)*3727)
    plt.xticks([x1-15,x1,x1+15])
    plt.ylabel('Flux [$10^{-17} erg\, s^{-1} cm^{-2}  \AA^{-1}]$')

    #If Ha is below 9500 A, show it
    if obj.z>0.44:
        p3 = plt.subplot(gs[1,1:4])
    else:
        p3 = plt.subplot(gs[1,1:3])
    p3.vlines(x = em_lines*(1+z_backgal),ymin= -100,ymax= 100,colors= 'g',linestyles='dashed')
    loc_flux = obj.flux[obj.wave2bin((1+z_backgal)*(4861-10)) :obj.wave2bin((1+z_backgal)*(5007+10))]
    p3.plot(obj.wave[obj.wave2bin((1+z_backgal)*(4861-10)):obj.wave2bin((1+z_backgal)*(5007+10))],
        loc_flux,'k', label = 'OIII, Hb', drawstyle='steps-mid')
    p3.plot(obj.wave[obj.wave2bin((1+z_backgal)*(4861-10)):obj.wave2bin((1+z_backgal)*(5007+10))],
        obj.synflux[obj.wave2bin((1+z_backgal)*(4861-10)):obj.wave2bin((1+z_backgal)*(5007+10))],
        'r', label = 'OIII, Hb', drawstyle='steps-mid')
    if loc_flux != []:
        p3.set_ylim(np.min(loc_flux)-1,np.max(loc_flux)+1)

    plt.title(r'H$\beta$,[OIII] 4959, [OIII] 5007')
    plt.xlabel(r'Observed wavelength [$\AA$]')
    p3.set_xlim((1+z_backgal)*(4861-10),(1+z_backgal)*(5007+10))
    x1 = int((1+z_backgal)*4862/10.)*10

    if x1<7600:
        plt.xticks([x1,x1+50 , x1+100 ,  x1 +150 ,x1+200])
    else:
        plt.xticks([x1,x1+50 , x1+100 ,  x1 +150 ,x1+200, x1+ 250])


    box = p3.get_position()
    p3.set_position([box.x0+0.02,box.y0,box.width*0.9,box.height])

    if obj.z<0.44:
        p4 = plt.subplot(gs[1,3:4])
        p4.vlines(x = em_lines*(1+z_backgal),ymin= -100,ymax= 100,colors= 'g',linestyles='dashed')
        loc_flux = obj.flux[obj.wave2bin((1+z_backgal)*(6562-10)):obj.wave2bin((1+z_backgal)*(6562+10))]
        p4.plot(obj.wave[obj.wave2bin((1+z_backgal)*(6562-10)):obj.wave2bin((1+z_backgal)*(6562+10))],
            loc_flux,'k', label = 'Ha', drawstyle='steps-mid')
        p4.plot(obj.wave[obj.wave2bin((1+z_backgal)*(6562-10)):obj.wave2bin((1+z_backgal)*(6562+10))],
            obj.synflux[obj.wave2bin((1+z_backgal)*(6562-10)) :obj.wave2bin((1+z_backgal)*(6562+10))],
            'r', label = 'Ha', drawstyle='steps-mid')
        if loc_flux != []:
            p4.set_ylim(np.min(loc_flux)-1,np.max(loc_flux)+1)
        plt.title(r'H$\alpha$')
        p4.set_xlim((1+z_backgal)*(6562-10),(1+z_backgal)*(6562+10))
        x1 = int((1+z_backgal)*6562)
        if x1 < 9900:
            plt.xticks([x1-10,x1,x1+10], [str(x1-10),str(x1),str(x1+10)])
        else:
            plt.xticks([x1-10,x1,x1+10], [str(x1-10),'',str(x1+10)])
    
    plt.savefig(savedir +'/plots/'+SDSSname(obj.RA,obj.DEC)+ '-' + str(obj.plate) + '-' + str(obj.mjd) + '-' + str(obj.fiberid) + '-'+str(n) +'.png')
    plt.close()