Example #1
0
def test_download_midi():

    argv = '2013,31394,1,1'

    driver = utils.start_driver('phantomjs', verbose=True)

    try:
        session.login(driver, credential)
        composer_id, work_id, page_id, track_id = map(int, argv.split(','))

        out_dir = 'midi/{}'.format(composer_id)
        job = download.Download(driver, composer_id, work_id, page_id,
                                track_id)
        job.order()
        utils.wait(3)
        success = False
        fname = job.pickup(out_dir)
        if fname:
            success = job.cleanup()
        if success:
            utils.print_message('successfully downloaded {}'.format(
                job.track.title))
            utils.print_message('output file: {}'.format(fname))
        utils.wait(3)
        session.logout(driver)

    finally:
        utils.close_driver(driver, verbose=True)
Example #2
0
def test_login():

    driver = utils.start_driver('phantomjs', verbose=True)

    try:
        session.login(driver, credential)
        utils.wait(5)
        session.is_login(driver, verbose=True)
        utils.wait(5)
        session.logout(driver)

    finally:
        utils.close_driver(driver, verbose=True)
Example #3
0
def test_get_composer_data():

    composer_id = '2062'
    out_json = '2062.json'

    driver = utils.start_driver('phantomjs', verbose=True)

    try:
        c = composer.Composer(driver, composer_id)
        c.get_all_works()
        c.format_json(fname_out=out_json)

    finally:
        utils.close_driver(driver, verbose=True)
Example #4
0
def get_all_composers(out_json_fname, out_list_fname):

    driver = utils.start_driver('phantomjs', verbose=True)

    try:
        composer_json = composers.get_all_composers(driver)
        utils.save_json(composer_json, out_json_fname)

        # write composer id list
        composer_id_list = []
        for composer in composer_json:
            composer_id = composer['url'].split('/')[-1].replace('.html', '')
            composer_id_list.append(int(composer_id))

        fout = open(out_list_fname, 'w')
        utils.print_message('wrinting composer ID list ' + out_list_fname)
        for composer_id in sorted(composer_id_list):
            print(composer_id, file=fout)
        fout.close()

    finally:
        utils.close_driver(driver, verbose=True)
Example #5
0
def get_composer_works(composers_list_fname, skip_exist=True):

    # start virtual display
    display = Display(visible=0, size=(1024, 768))
    display.start()
    driver = utils.start_driver('chrome', verbose=True)

    try:

        for composer_id in open(composers_list_fname):
            composer_id = composer_id.rstrip()
            out_json = '{}/data/composer/{}.json'.format(root_dir, composer_id)
            if skip_exist and os.path.isfile(out_json):
                continue

            nretry = 0
            while nretry < 10:
                try:
                    utils.print_message(
                        'extract works of composer {}'.format(composer_id))
                    c = composer.Composer(driver, composer_id)
                    c.get_all_works()
                    c.format_json(fname_out=out_json)
                    utils.print_message('\n')
                    break

                except:
                    utils.print_message('*ERROR* failed to extract works of '
                                        'composer {} (#retry={})'.format(
                                            composer_id, nretry))
                    utils.wait(3)
                    nretry += 1

    finally:

        utils.close_driver(driver, verbose=True)
        display.stop()
Example #6
0
def check_composer_ntrack(composers_list_fname, online_mode):

    host = 'https://www.classicalarchives.com'
    composer_dir = '{}/data/composer'.format(root_dir)

    mismatch_list = []
    if online_mode:
        driver = utils.start_driver('phantomjs', verbose=True)

    try:
        for composer_id in open(composers_list_fname):
            composer_id = composer_id.rstrip()
            composer_json = '{}/{}.json'.format(composer_dir, composer_id)
            composer = utils.load_json(composer_json, verbose=False)

            ntrack = 0
            for work in composer['work_list']:
                for page in work['page_list']:
                    for track in page['track_list']:
                        ntrack += 1

            if not online_mode:
                if composer['ntrack'] != ntrack:
                    mismatch_list.append(composer_id)
                continue

            # online mode
            ntrack_online = 0
            nretry = 0
            while nretry < 10:
                try:
                    utils.open_url(
                        driver,
                        host + '/midi/composer/{}.html'.format(composer_id),
                        reopen=True)
                    ntrack_online = int(
                        driver.find_element_by_xpath(
                            '//div[@id="wMidi"]//li[@class="counts"]').text.
                        split()[1].replace(',', ''))
                    break
                except:
                    utils.print_message('*ERROR* failed to extract #track for '
                                        'composer {} (#retry={})'.format(
                                            composer_id, nretry))
                    utils.wait(3)
                    nretry += 1

            print('composer={}, #track online={} local={}'.format(
                composer_id, ntrack_online, ntrack))

            if ntrack_online != ntrack:
                mismatch_list.append(composer_id)

    finally:
        if online_mode:
            utils.close_driver(driver, verbose=True)

    print('Found {} composers with inconsistent #tracks ({} mode)\n'.format(
        len(mismatch_list), 'online' if online_mode else 'local'))

    for composer_id in mismatch_list:
        print(composer_id)
Example #7
0
def get_composer_midis(midi_list_fname):

    utils.print_message('---------- Start Time: {} ----------'.format(time.ctime()))

    # start virtual display
    display = Display(visible=0, size=(1024,768))
    display.start()

    credential = '{}/data/cma.credential.json'.format(root_dir)
    driver = utils.start_driver('chrome', verbose=True)

    try:

        session.login(driver, credential)
        utils.wait(3)
        utils.print_message(' ')

        ntrack = 0
        ntrack_max = 103   # 100 midis/day
        for line in open(midi_list_fname):

            composer_id, work_id, page_id, track_id = map(
                int, line.rstrip().split(','))

            out_dir = '{}/data/midi/{}'.format(root_dir, composer_id)
            os.makedirs(out_dir, exist_ok=True)
            fname_prefix = 'composer_{}.work_{}.page_{}.track_{}'.format(
                composer_id, work_id, page_id, track_id)

            # check if output midi already exists
            file_exist = False
            for local_fname in os.listdir(out_dir):
                if local_fname.startswith(fname_prefix + '.'):
                    file_exist = True
            if file_exist:
                continue

            # create a download job
            job = download.Download(
                driver, composer_id, work_id, page_id, track_id)
            job.order()
            utils.wait(5)

            # check if a download is successfully created
            success = False
            fname = job.pickup(out_dir)
            if fname:
                success = job.cleanup()
            if not success:
                break

            utils.print_message('successfully downloaded {} ({}/100)'
                                .format(job.track.title, ntrack+1))
            utils.print_message('output file: {}'.format(fname))
            utils.print_message(' ')
            utils.wait(3)

            ntrack += 1

            # if daily limit is reached
            if ntrack == ntrack_max:
                break

    finally:

        utils.print_message(' ')
        session.logout(driver)
        utils.wait(3)
        utils.close_driver(driver, verbose=True)
        display.stop()


    utils.print_message('---------- Finish Time: {} ----------'.format(time.ctime()))