Example #1
0
def analyse_all_genomes(genomes, dbpath, tmp_path, nbn, soft, logger, quiet=False):
    """

    Parameters
    ----------
    genomes : dict
        {genome: spegenus.date}
    dbpath : str
        path to folder containing genomes
    tmp_path : str
        path to put out files
    nbn : int
        minimum number of 'N' required to cut into a new contig
    soft : str
        soft used (prokka, prodigal, or None if called by prepare module)
    logger : logging.Logger
        logger object to write log information. Because this function can be called from
        prepare module, where sub logger name is different
    quiet : bool
        True if nothing must be written to stdout/stderr, False otherwise

    Returns
    -------
    dict
        {genome: [spegenus.date, orig_name, path_to_seq_to_annotate, size, nbcont, l90]}

    """
    cut = nbn > 0
    pat = None  ## To put pattern with which sequence must be cut
    if cut:
        pat = 'N' * nbn + "+"
    nbgen = len(genomes)
    bar = None
    curnum = None
    if cut:
        logger.info(("Cutting genomes at each time there are at least {} 'N' in a row, "
                     "and then, calculating genome size, number of contigs and L90.").format(nbn))
    else:
        logger.info("Calculating genome size, number of contigs, L90")
    if not quiet:
        # Create progressbar
        widgets = ['Analysis: ', progressbar.Bar(marker='█', left='', right=''),
                   ' ', progressbar.Counter(), "/{}".format(nbgen), ' (',
                   progressbar.Percentage(), ') - ', progressbar.Timer(), ' - ',
                   progressbar.ETA()
                   ]
        bar = progressbar.ProgressBar(widgets=widgets, max_value=nbgen, term_width=79).start()
        curnum = 1
    toremove = []
    # Analyse genomes 1 by 1
    for genome, name in genomes.items():
        # If not quiet option, show progress bar
        if not quiet:
            bar.update(curnum)
            curnum += 1
        # analyse genome, and check everything went well.
        # exception if binary file
        try:
            res = analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, soft, logger=logger)
        except UnicodeDecodeError:
            logger.warning(f"'{genome}' does not seem to be a fasta file. It will be ignored.")
            res = False
        # Problem while analysing genome -> genome ignored
        if not res:
            toremove.append(genome)
    # If there are some genomes to remove (analysis failed), remove them from genomes dict.
    if toremove:
        for gen in toremove:
            del genomes[gen]
    if not genomes:
        logger.error(f"No genome was found in the database folder {dbpath}. See logfile "
                     "for more information.")
        sys.exit(1)
    if not quiet:
        bar.finish()
    return 0
Example #2
0
def main():
    browser = create_client()

    conn = sqlite3.connect('links.db')
    conn.row_factory = sqlite3.Row
    videos_info = conn.execute(
        f'select * from videos where downloaded = 0 and download_forbidden isnull'
    ).fetchall()
    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Counter(), ' ',
        progressbar.Bar(), ' ',
        progressbar.FileTransferSpeed()
    ]
    pbar = progressbar.ProgressBar(widgets=widgets,
                                   max_value=len(videos_info)).start()

    for i, video_info in enumerate(videos_info):
        pbar.update(i)
        video_info = dict(video_info)
        video_id = video_info['video_id']
        browser.visit(video_info['video_url'])

        while browser.is_element_present_by_css(
                '.recaptchaContent'):  # sometimes wild captcha appears
            print("CAPTCHA NEEDED")
            sleep(60)

        if browser.is_element_present_by_css('.removed'):
            # video has been removed
            print('video has been removed\n')
            with conn:
                conn.execute(
                    f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"'
                )
            continue

        if not browser.is_element_present_by_css(
                '.premiumIconTitleOnVideo:visible'
        ) and not browser.is_element_present_by_css('#videoTitle'):
            # video has been removed
            print('video is somehow broken and not premiuzm\n')
            with conn:
                conn.execute(
                    f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"'
                )
            continue

        video_title = browser.find_by_css('#videoTitle').text  # type: str
        # because of f*****g windows
        video_title = video_title.replace(':', '').replace('?', '').replace('*', '').replace('"', '').replace('/', '') \
            .replace('\\', '')
        browser.find_by_id('player').click()  # pausing video
        browser.find_by_tag('body')._element.send_keys('M')  # muting video

        file_name = f'videos/{video_id}-{video_title}.mp4'
        if osp.exists(file_name):
            with conn:
                conn.execute(
                    f'UPDATE videos SET downloaded = 1 where video_id = "{video_id}"'
                )
            continue

        if browser.is_element_present_by_css(
                '.tab-menu-item.js-paidDownload[data-tab="download-tab"]'):
            # video has been removed
            print('video download is paid\n')
            with conn:
                conn.execute(
                    f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"'
                )
            continue

        download_tab_button_sel = '.tab-menu-item[data-tab="download-tab"]'
        vr_tab_button_sel = '.tab-menu-item[data-tab="vr-tab"]'
        if not browser.is_element_present_by_css(download_tab_button_sel) \
                and browser.is_element_present_by_css(vr_tab_button_sel):
            # video has been removed
            print('video is vr, no download\n')
            with conn:
                conn.execute(
                    f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"'
                )
            continue

        click_download_tab(browser, download_tab_button_sel)

        if is_download_forbidden(browser, conn, video_id):
            continue

        download_link = get_download_link(browser)
        # must have here headers, otherwise it behaves as api and does not serve the video
        for _ in range(5):
            try:
                request.urlretrieve(download_link, file_name)
                break
            except URLError:
                print('connection failed, trying again\n')

        print(file_name, 'downloaded\n')
        with conn:
            conn.execute(
                f'UPDATE videos SET downloaded = 1 where video_id = "{video_id}"'
            )

    pbar.finish()
    print('done')
Example #3
0
def train(maddpg, env, n_episodes=1000, save_every=50):
    """Training loop helper for running the environment using the MADDPG algorithm.
    Params
    ======
        maddpg (MADDPG): instance of MADDPG wrapper class
        env (UnityEnvironment): instance of Unity environment for training
        n_episodes (int): number of episodes to train for
        save_every (int): frequency to save model weights
    """
    widget = [
        "Episode: ",
        pb.Counter(), '/',
        str(n_episodes), ' ',
        pb.Percentage(), ' ',
        pb.ETA(), ' ',
        pb.Bar(marker=pb.RotatingMarker()), ' ', 'Rolling Average: ',
        pb.FormatLabel('')
    ]
    timer = pb.ProgressBar(widgets=widget, maxval=n_episodes).start()

    solved = False
    scores_total = []
    scores_deque = deque(maxlen=100)
    rolling_score_averages = []
    last_best_score = 0.0

    # Environment information
    brain_name = env.brain_names[0]

    for i_episode in range(1, n_episodes + 1):
        current_average = 0.0 if i_episode == 1 else rolling_score_averages[-1]
        widget[12] = pb.FormatLabel(str(current_average)[:6])
        timer.update(i_episode)

        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations[:, -STATE_SIZE:]
        scores = np.zeros(NUM_AGENTS)
        maddpg.reset()

        while True:
            actions = maddpg.act(states)

            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations[:, -STATE_SIZE:]
            rewards = env_info.rewards
            dones = env_info.local_done

            maddpg.step(states, actions, rewards, next_states, dones)

            scores += rewards
            states = next_states

            if np.any(dones):
                break

        max_episode_score = np.max(scores)

        scores_deque.append(max_episode_score)
        scores_total.append(max_episode_score)

        average_score = np.mean(scores_deque)
        rolling_score_averages.append(average_score)

        if average_score >= 0.5 and not solved:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode, average_score))
            solved = True
            maddpg.save_model()
            last_best_score = average_score

        if i_episode % save_every == 0 and solved:
            # Only save these weights if they are better than the ones previously saved
            if average_score > last_best_score:
                last_best_score = average_score
                maddpg.save_model()

    return scores_total, rolling_score_averages
Example #4
0
def excel(size_max=4, size_min=3):
    score = Score(size_max=size_max, size_min=size_min)
    wb = openpyxl.load_workbook(filename=path_excel)
    ws = wb.worksheets[0]
    excel_clear(ws)
    y = 2
    x = 4 * 0
    files = os.listdir(path)

    number = 0
    for file in files:
        if not os.path.isdir(path + '/' + file):
            number += 1
    bar = progressbar.ProgressBar(
        max_value=number * 2,
        widgets=[
            '自动评价中: ',
            progressbar.Bar('>'),
            ' ',
            progressbar.Counter(format='%(value)02d/%(max_value)d'),
        ],
    )

    # for file in files:
    #     if not os.path.isdir(path + '/' + file):
    #         this = Get.get_data(path + '/' + file)
    #         res = score.score(this.result1)
    #         if file.split('_')[-3].find('告') == -1:
    #             ws.cell(row=y, column=1).value = file.split('_')[-3] + '_' + file.split('_')[-2]
    #         else:
    #             ws.cell(row=y, column=1).value = file.split('_')[-2]
    #         ws.cell(row=y, column=2+x).value = res[1][0]
    #         ws.cell(row=y, column=3+x).value = res[1][1]
    #         ws.cell(row=y, column=4+x).value = res[1][2]
    #         bar.update(y-2)
    #         y += 1

    y = 2
    x = 4 * 1
    for idd in range(1, 45):
        with DB() as db:
            sql = "SELECT name FROM good_id"
            res = db.read(sql)
            for name in res:
                result1 = get_data(name['name'])
                res = score.score(result1)
                if len(res[0]) == 0:
                    ws.cell(row=y, column=2 + x).value = 0
                    ws.cell(row=y, column=3 + x).value = 0
                    ws.cell(row=y, column=4 + x).value = 0
                else:
                    ws.cell(row=y, column=2 + x).value = res[1][0]
                    ws.cell(row=y, column=3 + x).value = res[1][1]
                    ws.cell(row=y, column=4 + x).value = res[1][2]
                # bar.update(y-2+number)
                y += 1
    try:
        wb.save(filename=path_excel)
    except PermissionError:
        print()
        print('保存 {} 文件时:缺少权限 or 文件已打开!!!'.format(path_excel))
        wb.close()
        exit()
    print('Score table has created!')
def main():
    parser = argparse.ArgumentParser(description='Convert filenames.')
    parser.add_argument('--run_directory', '-r', metavar='MY_DATA_DIR', type=str,
                        help='root of dataset files')

    args = parser.parse_args()
    run_directory = args.run_directory

    ini_file = "gta-postprocessing.ini"
    visualization.multi_page = False
    visualization.ini_file = ini_file
    visualization.use_cache = False

    conn = visualization.get_connection_pooled()

    CONFIG = ConfigParser()
    CONFIG.read(ini_file)

    sample_file_path = osp.join(run_directory, '0')
    if not osp.exists(sample_file_path):
        print('path to dataset images can not be found, tried {}'.format(sample_file_path))
        return

    # at first, I find the run_id by checking first image name
    cur: cursor = conn.cursor()
    json_name = glob.glob(osp.join(sample_file_path, '*.json'))[0]
    with open(json_name) as f:
        data = json.load(f)
    imagepath = data['imagepath']

    cur.execute("""SELECT run_id
          FROM snapshots \
          WHERE imagepath = %(imagepath)s
        """, {'imagepath': imagepath})

    run_id = cur.fetchone()['run_id']

    # then I get sorted all scene ids
    cur = conn.cursor()

    cur.execute("""SELECT scene_id, min(timestamp) 
            FROM snapshots
            WHERE run_id = %(run_id)s
            GROUP BY scene_id
          ORDER BY min(timestamp) ASC
        """, {'run_id': run_id})

    old_scenes = {}
    new_scenes = {}
    for i, row in enumerate(cur):
        old_scenes[str(i)] = row['scene_id']
        new_scenes[row['scene_id']] = f'{i:06}'

    print('everything loaded, starting renaming')
    widgets = [progressbar.Percentage(), ' ', progressbar.Counter(), ' ', progressbar.Bar(), ' ',
               progressbar.FileTransferSpeed()]

    pbar = progressbar.ProgressBar(widgets=widgets, maxval=len(old_scenes) * 6 * 4).start()
    counter = 0

    # I know directory structure is root/cam_index/files
    for camera_dir in os.listdir(run_directory):
        # camera names are integer values, other directorial shall be skipped
        if not camera_dir.isdigit():
            continue

        # each of 4 file types in different for loop, for simplicity
        for suffix in ['.jpg', '-depth.png', '-stencil.png', '.json']:
            for filename in get_files(run_directory, camera_dir, suffix):
                basename = get_base_name(filename)
                base_suffix = suffix.split('.')[0]
                if len(base_suffix) > 1:
                    basename = basename[:-len(base_suffix)]
                counter += 1
                pbar.update(counter)
                old_name = osp.join(run_directory, camera_dir, basename+suffix)
                new_name = osp.join(run_directory, camera_dir, new_scenes[old_scenes[basename]]+suffix)
                os.rename(old_name, new_name)

    pbar.finish()
    print('done')
Example #6
0
    def start(self) -> None:
        self.random_state = self._set_random_state(
            self.config.get('hyperopt_random_state', None))
        logger.info(f"Using optimizer random state: {self.random_state}")
        self.hyperopt_table_header = -1
        # Initialize spaces ...
        self.init_spaces()

        self.prepare_hyperopt_data()

        # We don't need exchange instance anymore while running hyperopt
        self.backtesting.exchange.close()
        self.backtesting.exchange._api = None  # type: ignore
        self.backtesting.exchange._api_async = None  # type: ignore
        # self.backtesting.exchange = None  # type: ignore
        self.backtesting.pairlists = None  # type: ignore

        cpus = cpu_count()
        logger.info(f"Found {cpus} CPU cores. Let's make them scream!")
        config_jobs = self.config.get('hyperopt_jobs', -1)
        logger.info(f'Number of parallel jobs set as: {config_jobs}')

        self.opt = self.get_optimizer(self.dimensions, config_jobs)

        if self.print_colorized:
            colorama_init(autoreset=True)

        try:
            with Parallel(n_jobs=config_jobs) as parallel:
                jobs = parallel._effective_n_jobs()
                logger.info(
                    f'Effective number of parallel workers used: {jobs}')

                # Define progressbar
                if self.print_colorized:
                    widgets = [
                        ' [Epoch ',
                        progressbar.Counter(),
                        ' of ',
                        str(self.total_epochs),
                        ' (',
                        progressbar.Percentage(),
                        ')] ',
                        progressbar.Bar(marker=progressbar.AnimatedMarker(
                            fill='\N{FULL BLOCK}',
                            fill_wrap=Fore.GREEN + '{}' + Fore.RESET,
                            marker_wrap=Style.BRIGHT + '{}' + Style.RESET_ALL,
                        )),
                        ' [',
                        progressbar.ETA(),
                        ', ',
                        progressbar.Timer(),
                        ']',
                    ]
                else:
                    widgets = [
                        ' [Epoch ',
                        progressbar.Counter(),
                        ' of ',
                        str(self.total_epochs),
                        ' (',
                        progressbar.Percentage(),
                        ')] ',
                        progressbar.Bar(marker=progressbar.AnimatedMarker(
                            fill='\N{FULL BLOCK}', )),
                        ' [',
                        progressbar.ETA(),
                        ', ',
                        progressbar.Timer(),
                        ']',
                    ]
                with progressbar.ProgressBar(max_value=self.total_epochs,
                                             redirect_stdout=False,
                                             redirect_stderr=False,
                                             widgets=widgets) as pbar:
                    EVALS = ceil(self.total_epochs / jobs)
                    for i in range(EVALS):
                        # Correct the number of epochs to be processed for the last
                        # iteration (should not exceed self.total_epochs in total)
                        n_rest = (i + 1) * jobs - self.total_epochs
                        current_jobs = jobs - n_rest if n_rest > 0 else jobs

                        asked = self.opt.ask(n_points=current_jobs)
                        f_val = self.run_optimizer_parallel(parallel, asked, i)
                        self.opt.tell(asked, [v['loss'] for v in f_val])

                        # Calculate progressbar outputs
                        for j, val in enumerate(f_val):
                            # Use human-friendly indexes here (starting from 1)
                            current = i * jobs + j + 1
                            val['current_epoch'] = current
                            val['is_initial_point'] = current <= INITIAL_POINTS

                            logger.debug(f"Optimizer epoch evaluated: {val}")

                            is_best = HyperoptTools.is_best_loss(
                                val, self.current_best_loss)
                            # This value is assigned here and not in the optimization method
                            # to keep proper order in the list of results. That's because
                            # evaluations can take different time. Here they are aligned in the
                            # order they will be shown to the user.
                            val['is_best'] = is_best
                            self.print_results(val)

                            if is_best:
                                self.current_best_loss = val['loss']
                                self.current_best_epoch = val

                            self._save_result(val)

                            pbar.update(current)

        except KeyboardInterrupt:
            print('User interrupted..')

        logger.info(
            f"{self.num_epochs_saved} {plural(self.num_epochs_saved, 'epoch')} "
            f"saved to '{self.results_file}'.")

        if self.current_best_epoch:
            if self.auto_hyperopt:
                HyperoptTools.try_export_params(
                    self.config, self.backtesting.strategy.get_strategy_name(),
                    self.current_best_epoch)

            HyperoptTools.show_epoch_details(self.current_best_epoch,
                                             self.total_epochs,
                                             self.print_json)
        else:
            # This is printed when Ctrl+C is pressed quickly, before first epochs have
            # a chance to be evaluated.
            print("No epochs evaluated yet, no best result.")
def run_annotation_all(genomes, threads, force, annot_folder, fgn, prodigal_only=False,
                       small=False, quiet=False):
    """
    For each genome in genomes, run prokka (or only prodigal) to annotate the genome.

    Parameters
    ----------
    genomes : dict
        {genome: [gembase_name, path_to_origfile, path_split_gembase, gsize, nbcont, L90]}
    threads : int
        max number of threads that can be used
    force : bool
        if False, do not override prokka/prodigal outdir and result dir if they exist. If\
        True, rerun prokka and override existing results, for all genomes.
    annot_folder : str
        folder where prokka/prodigal results must be written: for each genome,
        a directory <genome_name>-prokkaRes or <genome_name>-prodigalRes> will be created
        in this folder, and all the results
        of prokka/prodigal for the genome will be written inside
    fgn : str
        name (key in genomes dict) of the fist genome, which will be used for prodigal training
    prodigal_only : bool
        True if only prodigal must run, False if prokka must run
    small : bool
        True -> use -p meta option with prodigal. Do not use training
    quiet : bool
        True if nothing must be written to stderr/stdout, False otherwise

    Returns
    -------
    dict
        {genome: boolean} -> with True if prokka/prodigal ran well, False otherwise.
    """

    # Update information according to annotation soft used and write message
    if prodigal_only:
        message = "Annotating all genomes with prodigal"
        run_annot = run_prodigal
        main_logger = logging.getLogger("annotate.prodigal")
    else:
        message = "Annotating all genomes with prokka"
        run_annot = run_prokka
        main_logger = logging.getLogger("annotate.prokka")
    main_logger.info(message)
    # Get total number of genomes to annotate, used to show annotation progress
    nbgen = len(genomes)
    bar = None
    # If user did not ask for quiet, create progressbar
    if not quiet:
        # Create progress bar
        widgets = ['Annotation: ', progressbar.Bar(marker='█', left='', right=''),
                   ' ', progressbar.Counter(), "/{}".format(nbgen), ' (',
                   progressbar.Percentage(), ') - ', progressbar.Timer(), ' - '
                  ]
        bar = progressbar.ProgressBar(widgets=widgets, max_value=nbgen,
                                      term_width=79).start()
    # Get resource availability:
    # - number of threads used by prokka/prodigal (cores_annot)
    # - how many genomes can be annotated at the same time (pool_size)
    # - prodigal does not run with several threads: with prodigal, always cores_annot == 1
    # and pool_size == threads
    gpath_train = ""  # by default, no training genome
    if prodigal_only:
        cores_annot = 1
        pool_size = threads
        # If prodigal, train on the first genome
        # fgn is key of genomes, genomes[fgn] = [_,_,annote_file,_,_,_]
        gtrain = genomes[fgn][2]
        # If problem, gpath_train will be empty, but this will be checked while
        # trying to run prodigal, because we also need to check that genomes are not simply
        # already annotated
        if not small:
            gpath_train = prodigal_train(gtrain, annot_folder)
        else:
            gpath_train = "small option"
    elif threads <= 3:
        # less than 3 threads: run prokka 1 by 1 with all threads
        cores_annot = threads
        pool_size = 1
    else:
        # use multiprocessing
        # if there are more threads than genomes, use as many threads as possible per genome
        if len(genomes) <= threads:
            cores_annot = int(threads / len(genomes))
        # otherwise, use 2 threads per genome (and nb_genome/2 genomes at the same time)
        else:
            cores_annot = 2
        pool_size = int(threads / cores_annot)
    #  Create pool with a given size (=number of tasks to be launched in parallel)
    pool = multiprocessing.Pool(pool_size)
    # Create a Queue to put logs from processes, and handle them after from a single thread
    m = multiprocessing.Manager()
    q = m.Queue()
    # {genome: [gembase_name, path_to_origfile, path_toannotate_file, gsize, nbcont, L90]}
    # arguments: gpath, prok_folder, threads, name, force, nbcont, small(for prodigal), q
    arguments = [(genomes[g][2], annot_folder, cores_annot, genomes[g][0],
                  force, genomes[g][4], gpath_train, q)
                 for g in sorted(genomes)]
    try:
        # Start pool (run 'run_annot' n each set of arguments)
        final = pool.map_async(run_annot, arguments, chunksize=1)
        # Close pool: no more data will be put on this pool
        pool.close()
        # Listen for logs in processes
        lp = threading.Thread(target=utils.logger_thread, args=(q,))
        lp.start()
        if not quiet:
            while True:
                # Final is ready when all pool elements are done
                if final.ready():
                    break
                # If not done, get number of genomes left
                remaining = final._number_left
                # Add this to start progressbar with 0% instead of N/A%
                if remaining == nbgen:
                    bar.update(0.0000001)
                else:
                    # Update progress bar
                    bar.update(nbgen - remaining)
            # End progress bar
            bar.finish()
        pool.join()
        # Put None to tell 'q' that everything is finished. It can stopped and be joined.
        q.put(None)
        # join lp (tell to stop once all log processes are done, which is the case here)
        lp.join()
        final = final.get()
    # # If user stops programm (ctrl+C), end it
    # except KeyboardInterrupt as ki:
    #     print("error")
    #     for worker in pool._pool:
    #         print(worker.is_alive())
    #     pool.join()
    #     print("closed")
    #     pool.terminate()
    #     print("--------------terminate ok----------------")
    #     lp.join()
    #     print("thread stopped")
    #     # run_event.clear()
    #     # lp.terminate()
    #     # print("--------------JOIN--------------")
    #     # pool.terminate()
    #     main_logger.error("Process killed by CTRL+C")
    #     return "coucou"
    # If an error occurs, terminate pool, write error and exit
    except Exception as excp:  # pragma: no cover
        pool.terminate()
        main_logger.error(excp)
        sys.exit(1)
    final = {genome: res for genome, res in zip(sorted(genomes), final)}
    return final
def counter_and_timer():
    widgets = ['Processed: ', progressbar.Counter(),
               ' lines (', progressbar.Timer(), ')']
    bar = progressbar.ProgressBar(widgets=widgets)
    for i in bar((i for i in range(15))):
        sleep(0.1)
def collect_from_multinest_constrainer(d, N, seed, niter=10000, verbose=False):
    print('collecting', d, N, seed)
    sequence = numpy.loadtxt('mn_pyramid_%d_%d_%d_sequence' % (d, N, seed))
    nested_samples = numpy.loadtxt('mn_pyramid_%d_%d_%d_.txt' %
                                   (d, N, seed))[:niter * 10 + 2 * N, :]
    # one has Lmin, L, n, the other has weight, -2*L
    assert sequence.shape[1] - 3 == nested_samples.shape[1] - 2
    #	dtype=[('Lmin', float), ('L', float), ('d', float), ('n', int)])
    #assert niter == 2000, niter
    # some points should be ignored, because of ellipse overlap
    #  -- specifically, those where the sample was doubled
    skip_sequence = []
    # select those coordinates/L that are in nested_samples
    coordinates_set = set([tuple(c) for c in nested_samples[:, 2:]])
    coordinates = numpy.array(nested_samples[:, 2:])

    # compute distances between coordinates and sequence[:,4:]
    #d = scipy.spatial.distance.cdist(coordinates, sequence[:,4:], metric='chebyshev')
    #print (d[:15000] < 1e-5).sum()

    #nprev = 0
    pbar = progressbar.ProgressBar(widgets=[
        progressbar.Percentage(),
        progressbar.Counter('%5d'),
        progressbar.Bar(),
        progressbar.ETA()
    ])
    for i, row in enumerate(pbar(sequence)):
        Lmin, L, n = row[:3]
        n = int(n)
        uj = row[3:]
        #nprev += n

        chosen = tuple(uj) in coordinates_set
        #if not chosen:
        #	scale = numpy.abs(uj - 0.5).max()
        #	dist = scipy.spatial.distance.cdist([(uj - 0.5) / scale],
        #		(coordinates - 0.5) / scale, metric='chebyshev')
        #	chosen = (dist < 1e-5).any()
        #	if chosen:
        #		print (dist < 1e-5).sum(), dist
        #	mask = numpy.array([numpy.allclose(uj, c, rtol=1e-09, atol=1e-20) for c in coordinates])
        #	print mask.sum(), uj.shape, coordinates.shape
        #	chosen = mask.any()

        if chosen:  # or numpy.allclose(uj, coordinates, rtol=1e-09, atol=1e-20):
            #row[3] = nprev
            skip_sequence.append(row)
            #nprev = 0

    #sequence = skip_sequence
    #skip_sequence = []
    #lastrow = sequence[0]
    #for i, row in enumerate(sequence[1:]):
    #	if lastrow[0] < -1e300 or row[0] != lastrow[0]:
    #		skip_sequence.append(lastrow)
    #	else:
    #		print 'double   %d:' % i, lastrow
    #		print 'for      %d:' % (i+1), row
    #		# add number of samples required
    #	lastrow = row
    #double   559: [-0.99252442 -0.99226279  0.45990741  3.          0.46586668  0.27797759
    #  0.95256335  0.04009259  0.73110932  0.10233873  0.37383509]
    #for      560: [-0.99252442 -0.9900788   0.36895728  1.          0.59133446  0.69940084
    #  0.17985809  0.73270625  0.6301567   0.13104272  0.28685457]

    print('sequence shortened from %d to %d (%.3f%%) from %d samples' %
          (len(sequence), len(skip_sequence),
           len(skip_sequence) * 100. / len(sequence), len(coordinates)))
    assert len(coordinates) == len(skip_sequence)
    assert niter + 2 * N <= len(skip_sequence)
    live_points = {}
    total_samples = 0

    distances = []
    shrinkages = []
    # go through posterior chain of MultiNest. consider likelihood values
    # skip those in between, keep adding up n
    i = 0
    pbar = progressbar.ProgressBar(widgets=[
        progressbar.Percentage(),
        progressbar.Counter('%5d'),
        progressbar.Bar(),
        progressbar.ETA()
    ])
    for row in pbar(skip_sequence[:N + niter]):
        Lmin, L, n = row[:3]
        n = int(n)
        uj = row[3:]

        # initial fill-up
        if Lmin < -1e300:
            assert L not in live_points, L
            live_points[L] = [uj]
            continue
        total_samples += n

        #rsize    = (-Lmin)**100
        #rnewsize = (-L)**100
        #shrinkages.append(shrinkage(d, rsize, rnewsize, verbose=verbose))

        # previous point
        assert Lmin in live_points, Lmin
        ui = live_points[Lmin][-1]
        rsize = numpy.abs(ui - 0.5).max()
        #rsize = 0.5 - (-Lmin)**100
        # all live points
        points = []
        for p in live_points.values():
            points += p
        assert len(points) == N
        if verbose and i < 40:
            print('row:', len(points), Lmin, L, rsize, n, uj)

        dist = normalized_distance(d, points, rsize, uj, verbose=verbose)
        if verbose and i < 40:
            print('distance:', dist, rsize, -((0.5 - rsize)**0.01))
        # store distance
        distances.append(dist)

        # replace point
        live_points[Lmin].pop()
        if not live_points[Lmin]:
            del live_points[Lmin]
        live_points[L] = live_points.get(L, []) + [uj]

        # store shrinkage: use points after removing least likely point
        points = []
        for p in live_points.values():
            points += p
        rnewsize = numpy.abs(numpy.array(points) - 0.5).max()
        shrinkages.append(shrinkage(d, rsize, rnewsize, verbose=verbose))

        i = i + 1
        if i > niter:
            break
    return distances, shrinkages, total_samples, niter
def sample_from_constrainer(d,
                            N,
                            constrainer,
                            seed,
                            niter=10000,
                            verbose=False):
    numpy.random.seed(seed)
    # use 400 points
    points = list(numpy.random.uniform(size=(N, d)))
    values = [loglikelihood(x) for x in points]
    if verbose: print('points:', list(zip(points, values)))

    distances = []
    shrinkages = []
    previous = list(zip(points, points, values))

    pbar = progressbar.ProgressBar(widgets=[
        progressbar.Percentage(),
        progressbar.Counter('%5d'),
        progressbar.Bar(),
        progressbar.ETA()
    ])
    total_samples = 0
    for it in pbar(range(niter)):
        # remove lowest, draw a higher one
        i = numpy.argmin(values)
        k = numpy.random.randint(0, N - 1)
        if k >= i:
            k += 1
        Li = values[i]
        ui = points[i]
        xi = points[i]
        # reached numerical accuracy: all points are in the center
        if numpy.all(ui == 0.5):
            niter = it
            break
        assert numpy.isfinite(Li), Li
        assert numpy.isfinite(ui).all(), ui
        if verbose: print('calling draw_constrained with Lmin', Li, ui)

        uj, xj, Lj, n = constrainer.draw_constrained(
            Lmin=Li,
            priortransform=priortransform,
            loglikelihood=loglikelihood,
            previous=previous,
            ndim=d,
            draw_global_uniform=lambda: numpy.random.uniform(0, 1, size=d),
            startu=points[k],
            startx=points[k],
            startL=values[k],
            starti=i)
        assert numpy.isfinite(uj).all(), uj
        assert numpy.isfinite(Lj), Lj
        total_samples += n

        rsize = numpy.abs(ui - 0.5).max()
        dist = normalized_distance(d, points, rsize, uj, verbose=verbose)

        # store distance
        distances.append(dist)

        # replace point
        points[i] = uj
        values[i] = Lj
        previous.append([uj, uj, Lj])

        # store shrinkage: use points after removing least likely point
        rnewsize = numpy.abs(numpy.array(points) - 0.5).max()
        shrinkages.append(shrinkage(d, rsize, rnewsize, verbose=verbose))
    return distances, shrinkages, total_samples, niter
Example #11
0
    model = tf.keras.Sequential([
        tf.keras.layers.SimpleRNN(units=FLAGS['units'],
                                  input_shape=(FLAGS['sample_length'] - 1,
                                               len(unique_tokens))),
        tf.keras.layers.Dense(len(unique_tokens)),
        tf.keras.layers.Activation('softmax')
    ])
    optimizer = tf.keras.optimizers.Nadam()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy')

t1 = dt.now()
sz = samples.shape
for i in range(0, FLAGS['epochs']):
    widgets = [
        f'Epoch {i+1} batch ',
        pb.Counter(format='%(value)d/%(max_value)d'), ' ',
        pb.Bar(marker='.', left='[', right=']'), ' ',
        pb.ETA()
    ]
    with pb.ProgressBar(widgets=widgets,
                        max_value=m.ceil(sz[0] / FLAGS['batch_size'])) as bar:
        batch_i = 0
        while batch_i < sz[0]:
            bar.update(batch_i / FLAGS['batch_size'])
            batch = u.make_batch(samples, batch_i, FLAGS['batch_size'])
            one_hot = u.one_hot_batch(batch, unique_tokens)
            loss = model.train_on_batch(one_hot.x, one_hot.y)
            batch_i = batch_i + FLAGS['batch_size']
    print(f'Epoch: {i+1}, Loss: {loss}')

t2 = dt.now()
Example #12
0
def score_docs(csv, dic_type, prob_map, score_type, out_metrics, num_docs):
    
    """Wrapper function that executes functions for preprocessing and dictionary scoring.
    dict_type specifies the dicitonary with which the documents should be scored.
    Accepted values are: [emfd, mfd, mfd2]"""

    if score_type == 'wordlist':
        widgets = [
            'Processed: ', progressbar.Counter(),
            ' ', progressbar.Percentage(),
            ' ', progressbar.Bar(marker='❤'),
            ' ', progressbar.Timer(),
            ' ', progressbar.ETA(),
        ]

        with progressbar.ProgressBar(max_value=num_docs, widgets=widgets) as bar:
            moral_words = []
            for i, row in csv[0].iteritems():
                if row in emfd.keys():
                    moral_words.append(emfd[row])
                else:
                    bar.update(i)
                    continue
        

            emfd_score = {k: 0 for k in probabilites+senti}

            # Collect e-MFD data for all moral words in document
            for dic in moral_words:
                emfd_score['care_p'] += dic['care_p']
                emfd_score['fairness_p'] += dic['fairness_p']
                emfd_score['loyalty_p'] += dic['loyalty_p']
                emfd_score['authority_p'] += dic['authority_p']
                emfd_score['sanctity_p'] += dic['sanctity_p']
        
                emfd_score['care_sent'] += dic['care_sent']
                emfd_score['fairness_sent'] += dic['fairness_sent']
                emfd_score['loyalty_sent'] += dic['loyalty_sent']
                emfd_score['authority_sent'] += dic['authority_sent']
                emfd_score['sanctity_sent'] += dic['sanctity_sent']
                bar.update(i)

            emfd_score = {k: v/len(moral_words) for k, v in emfd_score.items()}
            emfd_score['cnt'] = len(moral_words)
            df = pd.DataFrame(pd.Series(emfd_score)).T
            df = df[['cnt']+probabilites+senti]
            return df

    if score_type == 'gdelt.ngrams':
        widgets = [
            'Processed: ', progressbar.Counter(),
            ' ', progressbar.Percentage(),
            ' ', progressbar.Bar(marker='❤'),
            ' ', progressbar.Timer(),
            ' ', progressbar.ETA(),
        ]

        with progressbar.ProgressBar(max_value=num_docs, widgets=widgets) as bar:
            moral_words = []
            word_frequncies = []
            for i, row in csv.iterrows():
                if row['word'] in emfd.keys():
                    moral_words.append( {'scores':emfd[row['word']], 'freq': row['freq']} )
                    word_frequncies.append(int(row['freq']))
                else:
                    bar.update(i)
                    continue
        

            emfd_score = {k: 0 for k in probabilites+senti}

            # Collect e-MFD data for all moral words in document
            for dic in moral_words:
                emfd_score['care_p'] += (dic['scores']['care_p'] * dic['freq'])
                emfd_score['fairness_p'] += (dic['scores']['fairness_p'] * dic['freq'])
                emfd_score['loyalty_p'] += (dic['scores']['loyalty_p'] * dic['freq'])
                emfd_score['authority_p'] += (dic['scores']['authority_p'] * dic['freq'])
                emfd_score['sanctity_p'] += (dic['scores']['sanctity_p'] * dic['freq'])
        
                emfd_score['care_sent'] += (dic['scores']['care_sent'] * dic['freq'])
                emfd_score['fairness_sent'] += (dic['scores']['fairness_sent'] * dic['freq'])
                emfd_score['loyalty_sent'] += (dic['scores']['loyalty_sent'] * dic['freq'])
                emfd_score['authority_sent'] += (dic['scores']['authority_sent'] * dic['freq'])
                emfd_score['sanctity_sent'] += (dic['scores']['sanctity_sent'] * dic['freq'])
                bar.update(i)

            emfd_score = {k: v/sum(word_frequncies) for k, v in emfd_score.items()}
            emfd_score['cnt'] = sum(word_frequncies)
            df = pd.DataFrame(pd.Series(emfd_score)).T
            df = df[['cnt']+probabilites+senti]
            return df

    nlp = spacy.load('en_core_web_sm', disable=['ner', 'parser'])
    nlp.add_pipe("mfd_tokenizer")
    
    if dic_type == 'emfd':
        if prob_map == 'all' and out_metrics == 'sentiment':
            nlp.add_pipe("score_emfd_all_sent", last=True)
        elif prob_map == 'all' and out_metrics == 'vice-virtue':
            nlp.add_pipe("score_emfd_all_vice_virtue", last=True)
        elif prob_map == 'single' and out_metrics == 'sentiment':
            nlp.add_pipe("score_emfd_single_sent", last=True)
        elif prob_map == 'single' and out_metrics == 'vice-virtue':
            nlp.add_pipe("score_emfd_single_vice_virtue", last=True)
    elif dic_type == 'mfd':
        nlp.add_pipe("score_mfd", last=True)
    elif dic_type == 'mfd2':
        nlp.add_pipe("score_mfd2", last=True)
    else:
        print('Dictionary type not recognized. Available values are: emfd, mfd, mfd2')
        return 

    scored_docs = []
    widgets = [
        'Processed: ', progressbar.Counter(),
        ' ', progressbar.Percentage(),
        ' ', progressbar.Bar(marker='❤'),
        ' ', progressbar.Timer(),
        ' ', progressbar.ETA(),
    ]

    
    with progressbar.ProgressBar(max_value=num_docs, widgets=widgets) as bar:
        for i, row in csv[0].iteritems():
            scored_docs.append(nlp(row))
            bar.update(i)

    df = pd.DataFrame(scored_docs)
    
    if dic_type == 'emfd':
        if prob_map == 'all' and out_metrics == 'sentiment':
            df['f_var'] = df[probabilites].var(axis=1)
            df['sent_var'] = df[senti].var(axis=1)
        elif prob_map == 'single' and out_metrics == 'sentiment':
            df['f_var'] = df[probabilites].var(axis=1)
            df['sent_var'] = df[senti].var(axis=1)
        elif prob_map == 'all' and out_metrics == 'vice-virtue':
            mfd_foundations = ['care.virtue', 'fairness.virtue', 'loyalty.virtue',
                   'authority.virtue','sanctity.virtue',
                   'care.vice','fairness.vice','loyalty.vice',
                   'authority.vice','sanctity.vice']
            df['f_var'] = df[mfd_foundations].var(axis=1)
            del df['moral']
        elif prob_map == 'single' and out_metrics == 'vice-virtue':
            mfd_foundations = ['care.virtue', 'fairness.virtue', 'loyalty.virtue',
                   'authority.virtue','sanctity.virtue',
                   'care.vice','fairness.vice','loyalty.vice',
                   'authority.vice','sanctity.vice']
            df['f_var'] = df[mfd_foundations].var(axis=1)
            
    if dic_type == 'mfd' or dic_type == 'mfd2':
        # Calculate variance
        mfd_foundations = ['care.virtue', 'fairness.virtue', 'loyalty.virtue',
                   'authority.virtue','sanctity.virtue',
                   'care.vice','fairness.vice','loyalty.vice',
                   'authority.vice','sanctity.vice']
        
        df['f_var'] = df[mfd_foundations].var(axis=1)
        
    return df
def multi_nested_integrator(multi_sampler,
                            tolerance=0.01,
                            max_samples=None,
                            min_samples=0,
                            need_robust_remainder_error=True):
    sampler = multi_sampler
    logVolremaining = 0
    logwidth = log(1 - exp(-1. / sampler.nlive_points))
    weights = []  #[-1e300, 1]]

    widgets = [
        progressbar.Counter('%f'),
        progressbar.Bar(),
        progressbar.Percentage(),
        AdaptiveETA()
    ]
    pbar = progressbar.ProgressBar(widgets=widgets)

    i = 0
    ndata = multi_sampler.ndata
    running = numpy.ones(ndata, dtype=bool)
    last_logwidth = numpy.zeros(ndata)
    last_logVolremaining = numpy.zeros(ndata)
    last_remainderZ = numpy.zeros(ndata)
    last_remainderZerr = numpy.zeros(ndata)
    logZerr = numpy.zeros(ndata)
    ui, xi, Li = sampler.next()
    wi = logwidth + Li
    logZ = wi
    H = Li - logZ
    remainder_tails = [[]] * ndata
    pbar.currval = i
    pbar.maxval = sampler.nlive_points
    pbar.start()
    while True:
        i = i + 1
        logwidth = log(1 - exp(-1. / sampler.nlive_points)) + logVolremaining
        last_logwidth[running] = logwidth
        last_logVolremaining[running] = logwidth
        logVolremaining -= 1. / sampler.nlive_points

        # fill up, otherwise set weight to zero
        Lifull = numpy.zeros(ndata)
        Lifull[:] = -numpy.inf
        Lifull[running] = Li
        uifull = numpy.zeros((ndata, ui.shape[1]))
        uifull[running, :] = ui
        xifull = numpy.zeros((ndata, ui.shape[1]))
        xifull[running, :] = xi
        weights.append([
            uifull, xifull, Lifull,
            numpy.where(running, logwidth, -numpy.inf), running
        ])

        logZerr[running] = (H[running] / sampler.nlive_points)**0.5

        sys.stdout.flush()
        pbar.update(i)

        # expected number of iterations:
        i_final = -sampler.nlive_points * (-sampler.Lmax + log(
            exp(
                numpy.max(
                    [tolerance - logZerr[running], logZerr[running] / 100.],
                    axis=0) + logZ[running]) - exp(logZ[running])))
        i_final = numpy.where(
            i_final < i + 1, i + 1,
            numpy.where(i_final > i + 100000, i + 100000, i_final))
        pbar.maxval = i_final.max()

        if i > min_samples and i % 50 == 1:
            remainderZ, remainderZerr, totalZ, totalZerr, totalZerr_bootstrapped = integrate_remainder(
                sampler, logwidth, logVolremaining, logZ[running], H[running],
                sampler.Lmax)
            # tolerance
            last_remainderZ[running] = remainderZ
            last_remainderZerr[running] = remainderZerr
            terminating = totalZerr < tolerance
            widgets[
                0] = '|%d/%d samples+%d/%d|lnZ = %.2f +- %.3f + %.3f|L=%.2f^%.2f ' % (
                    i + 1, pbar.maxval, sampler.nlive_points, sampler.ndraws,
                    logaddexp(logZ[running][0],
                              remainderZ[0]), max(logZerr[running]),
                    max(remainderZerr), Li[0], sampler.Lmax[0])
            if terminating.any():
                print 'terminating %d, namely:' % terminating.sum(), list(
                    numpy.where(terminating)[0])
                for j, k in enumerate(numpy.where(running)[0]):
                    if terminating[j]:
                        remainder_tails[k] = [[
                            ui, xi, Li, logwidth
                        ] for ui, xi, Li in sampler.remainder(j)]
                sampler.cut_down(~terminating)
                running[running] = ~terminating
            if not running.any():
                break
            print widgets[0]
        ui, xi, Li = sampler.next()
        wi = logwidth + Li
        logZnew = logaddexp(logZ[running], wi)
        H[running] = exp(wi - logZnew) * Li + exp(logZ[running] - logZnew) * (
            H[running] + logZ[running]) - logZnew
        logZ[running] = logZnew

    # add tail
    # not needed for integral, but for posterior samples, otherwise there
    # is a hole in the most likely parameter ranges.
    all_tails = numpy.ones(ndata, dtype=bool)
    for i in range(sampler.nlive_points):
        u, x, L, logwidth = zip(*[tail[i] for tail in remainder_tails])
        weights.append([u, x, L, logwidth, all_tails])
    logZerr = logZerr + last_remainderZerr
    logZ = logaddexp(logZ, last_remainderZ)

    return dict(logZ=logZ,
                logZerr=logZerr,
                weights=weights,
                information=H,
                niterations=i)
Example #14
0
def main():
    workspace_path = os.environ.get('AE_WORKSPACE_PATH')

    if workspace_path is None:
        print('Please define a workspace path:\n')
        print('export AE_WORKSPACE_PATH=/path/to/workspace\n')
        exit(-1)

    gentle_stop = np.array((1, ), dtype=np.bool)
    gentle_stop[0] = False

    def on_ctrl_c(signal, frame):
        gentle_stop[0] = True

    signal.signal(signal.SIGINT, on_ctrl_c)

    parser = argparse.ArgumentParser()
    parser.add_argument("experiment_name")
    parser.add_argument("-d", action='store_true', default=False)
    parser.add_argument("-gen", action='store_true', default=False)
    arguments = parser.parse_args()

    full_name = arguments.experiment_name.split('/')

    experiment_name = full_name.pop()
    experiment_group = full_name.pop() if len(full_name) > 0 else ''

    debug_mode = arguments.d
    generate_data = arguments.gen

    cfg_file_path = u.get_config_file_path(workspace_path, experiment_name,
                                           experiment_group)
    log_dir = u.get_log_dir(workspace_path, experiment_name, experiment_group)
    checkpoint_file = u.get_checkpoint_basefilename(log_dir)
    ckpt_dir = u.get_checkpoint_dir(log_dir)
    train_fig_dir = u.get_train_fig_dir(log_dir)
    dataset_path = u.get_dataset_path(workspace_path)

    if not os.path.exists(cfg_file_path):
        print('Could not find config file:\n')
        print('{}\n'.format(cfg_file_path))
        exit(-1)

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    if not os.path.exists(train_fig_dir):
        os.makedirs(train_fig_dir)
    if not os.path.exists(dataset_path):
        os.makedirs(dataset_path)

    args = configparser.ConfigParser()
    args.read(cfg_file_path)

    shutil.copy2(cfg_file_path, log_dir)

    with tf.variable_scope(experiment_name):
        dataset = factory.build_dataset(dataset_path, args)
        queue = factory.build_queue(dataset, args)
        encoder = factory.build_encoder(queue.x, args, is_training=True)
        decoder = factory.build_decoder(queue.y,
                                        encoder,
                                        args,
                                        is_training=True)
        ae = factory.build_ae(encoder, decoder, args)
        codebook = factory.build_codebook(encoder, dataset, args)
        train_op = factory.build_train_op(ae, args)
        saver = tf.train.Saver(save_relative_paths=True)

    num_iter = args.getint('Training',
                           'NUM_ITER') if not debug_mode else 100000
    save_interval = args.getint('Training', 'SAVE_INTERVAL')
    model_type = args.get('Dataset', 'MODEL')

    if model_type == 'dsprites':
        dataset.get_sprite_training_images(args)
    else:
        dataset.get_training_images(dataset_path, args)
        dataset.load_bg_images(dataset_path)

    if generate_data:
        print('finished generating synthetic training data for ' +
              experiment_name)
        print('exiting...')
        exit()

    widgets = [
        'Training: ',
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.Counter(),
        ' / %s' % num_iter, ' ',
        progressbar.ETA(), ' '
    ]
    bar = progressbar.ProgressBar(maxval=num_iter, widgets=widgets)

    gpu_options = tf.GPUOptions(allow_growth=True,
                                per_process_gpu_memory_fraction=0.9)
    config = tf.ConfigProto(gpu_options=gpu_options)

    with tf.Session(config=config) as sess:

        chkpt = tf.train.get_checkpoint_state(ckpt_dir)
        if chkpt and chkpt.model_checkpoint_path:
            saver.restore(sess, chkpt.model_checkpoint_path)
        else:
            sess.run(tf.global_variables_initializer())

        merged_loss_summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph)

        if not debug_mode:
            print('Training with %s model' % args.get('Dataset', 'MODEL'),
                  os.path.basename(args.get('Paths', 'MODEL_PATH')))
            bar.start()

        queue.start(sess)
        for i in range(ae.global_step.eval(), num_iter):
            if not debug_mode:
                sess.run(train_op)
                if i % 10 == 0:
                    loss = sess.run(merged_loss_summary)
                    summary_writer.add_summary(loss, i)

                bar.update(i)
                if (i + 1) % save_interval == 0:
                    saver.save(sess,
                               checkpoint_file,
                               global_step=ae.global_step)

                    this_x, this_y = sess.run([queue.x, queue.y])
                    reconstr_train = sess.run(decoder.x,
                                              feed_dict={queue.x: this_x})
                    train_imgs = np.hstack(
                        (u.tiles(this_x, 4,
                                 4), u.tiles(reconstr_train, 4,
                                             4), u.tiles(this_y, 4, 4)))
                    cv2.imwrite(
                        os.path.join(train_fig_dir,
                                     'training_images_%s.png' % i),
                        train_imgs * 255)
            else:

                this_x, this_y = sess.run([queue.x, queue.y])
                reconstr_train = sess.run(decoder.x,
                                          feed_dict={queue.x: this_x})
                cv2.imshow(
                    'sample batch',
                    np.hstack((u.tiles(this_x, 3,
                                       3), u.tiles(reconstr_train, 3,
                                                   3), u.tiles(this_y, 3, 3))))
                k = cv2.waitKey(0)
                if k == 27:
                    break

            if gentle_stop[0]:
                break

        queue.stop(sess)
        if not debug_mode:
            bar.finish()
        if not gentle_stop[0] and not debug_mode:
            print('To create the embedding run:\n')
            print('ae_embed {}\n'.format(full_name))
Example #15
0
    # optimizer_discriminator = Adam(params=net.discriminator.parameters(),lr = lr,betas=(0.9,0.999))
    optimizer_discriminator = RMSprop(params=net.discriminator.parameters(),
                                      lr=lr,
                                      alpha=0.9,
                                      eps=1e-8,
                                      weight_decay=0,
                                      momentum=0,
                                      centered=False)
    lr_discriminator = ExponentialLR(optimizer_discriminator, gamma=decay_lr)
    # lr_discriminator = MultiStepLR(optimizer_discriminator,milestones=[2],gamma=1)

    batch_number = len(dataloader)
    step_index = 0
    widgets = [
        'Batch: ',
        progressbar.Counter(), '/',
        progressbar.FormatCustomText('%(total)s', {"total": batch_number}),
        ' ',
        progressbar.Bar(marker="-", left='[', right=']'), ' ',
        progressbar.ETA(), ' ',
        progressbar.DynamicMessage('loss_nle'), ' ',
        progressbar.DynamicMessage('loss_encoder'), ' ',
        progressbar.DynamicMessage('loss_decoder'), ' ',
        progressbar.DynamicMessage('loss_discriminator'), ' ',
        progressbar.DynamicMessage('loss_mse_layer'), ' ',
        progressbar.DynamicMessage('loss_kld'), ' ',
        progressbar.DynamicMessage('loss_aux_classifier'), ' ',
        progressbar.DynamicMessage("epoch")
    ]

    # for each epoch
Example #16
0
        skiprows=1)
    antennas = stand_cable_delays[:, 0].astype(NP.int).astype(str)
    cable_delays = stand_cable_delays[:, 1]

    for it in xrange(max_n_timestamps):
        timestamp = timestamps[it]
        antenna_level_update_info = {}
        antenna_level_update_info['antenna_array'] = {}
        antenna_level_update_info['antenna_array']['timestamp'] = timestamp
        antenna_level_update_info['antennas'] = []

        print 'Consolidating Antenna updates...'
        progress = PGB.ProgressBar(widgets=[
            PGB.Percentage(),
            PGB.Bar(marker='-', left=' |', right='| '),
            PGB.Counter(), '/{0:0d} Antennas '.format(n_antennas),
            PGB.ETA()
        ],
                                   maxval=n_antennas).start()
        antnum = 0
        for ia, label in enumerate(antid):
            adict = {}
            adict['label'] = label
            adict['action'] = 'modify'
            adict['timestamp'] = timestamp
            adict['t'] = NP.arange(nts) * dt
            # adict['gridfunc_freq'] = 'scale'
            # adict['gridmethod'] = 'NN'
            # adict['distNN'] = 0.5 * FCNST.c / f0
            # adict['tol'] = 1.0e-6
            # adict['maxmatch'] = 1
Example #17
0
    model_f1.cuda()
    weight = weight.cuda()

model_g.train()
model_f1.train()

criterion_mask = torch.nn.CrossEntropyLoss(weight=weight)
criterion_yaw = get_yaw_loss(args.yaw_loss)
if torch.cuda.is_available():
    criterion_mask = criterion_mask.cuda()
    criterion_yaw = criterion_yaw.cuda()

for epoch in range(start_epoch, args.epochs):
    widgets = [
        'Epoch %d/%d,' % (epoch, args.epochs), ' ',
        progressbar.Counter('batch %(value)d/%(max_value)d')
    ]
    bar = progressbar.ProgressBar(widgets=widgets,
                                  max_value=len(train_loader),
                                  redirect_stdout=True)

    for ibatch, batch in bar(enumerate(train_loader)):
        log_counter += 1
        istep += args.batch_size

        imgs, gt_masks = batch['image'], batch['mask']
        imgs, gt_masks = Variable(imgs), Variable(gt_masks)
        if torch.cuda.is_available():
            imgs, gt_masks = imgs.cuda(), gt_masks.cuda()

        optimizer_f.zero_grad()
Example #18
0
def main():
    seeding(seed=SEED)
    # number of parallel agents
    parallel_envs = 1
    # number of agents per environment
    num_agents = 5
    # number of training episodes.
    # change this to higher number to experiment. say 30000.
    number_of_episodes = 60000
    episode_length = 35
    # how many episodes to save policy and gif
    save_interval = 1000
    t = 0
    scenario_name = "simple_spread_ivan"

    # amplitude of OU noise
    # this slowly decreases to 0
    noise = 0.5  # was 2, try 0.5, 0.2
    noise_reduction = 0.9999  # 0.999
    #### DECAY
    initial_noise = 0.1
    decay = 0.01

    # how many episodes before update
    # episode_per_update = UPDATE_EVERY * parallel_envs
    common_folder = time.strftime("/%m%d%y_%H%M%S")
    log_path = os.getcwd() + common_folder + "/log"
    model_dir = os.getcwd() + common_folder + "/model_dir"

    os.makedirs(model_dir, exist_ok=True)

    # initialize environment
    # torch.set_num_threads(parallel_envs)
    env = envs.make_parallel_env(parallel_envs, seed=3, benchmark=BENCHMARK)
    # env = envs.make_env("simple_spread_ivan")

    # initialize replay buffer
    buffer = ReplayBuffer(int(BUFFER_SIZE))

    # initialize policy and critic
    maddpg = MADDPG(num_agents=num_agents,
                    discount_factor=GAMMA,
                    tau=TAU,
                    lr_actor=LR_ACTOR,
                    lr_critic=LR_CRITIC,
                    weight_decay=WEIGHT_DECAY)
    logger = SummaryWriter(log_dir=log_path)

    agents_reward = []
    for n in range(num_agents):
        agents_reward.append([])
    # agent0_reward = []
    # agent1_reward = []
    # agent2_reward = []

    agent_info = [[[]]]  # placeholder for benchmarking info

    # training loop
    # show progressbar
    import progressbar as pb
    widget = [
        '\repisode: ',
        pb.Counter(), '/',
        str(number_of_episodes), ' ',
        pb.Percentage(), ' ',
        pb.ETA(), ' ',
        pb.Bar(marker=pb.RotatingMarker()), ' '
    ]
    timer = pb.ProgressBar(widgets=widget, maxval=number_of_episodes).start()

    print('Starting iterations...')
    for episode in range(0, number_of_episodes, parallel_envs):

        timer.update(episode)

        reward_this_episode = np.zeros((parallel_envs, num_agents))

        all_obs = env.reset()  #

        # flip the first two indices
        # ADD FOR WITHOUT PARALLEL ENV
        # all_obs = np.expand_dims(all_obs, axis=0)

        obs_roll = np.rollaxis(all_obs, 1)
        obs = transpose_list(obs_roll)

        # save info or not
        save_info = ((episode) % save_interval < parallel_envs
                     or episode == number_of_episodes - parallel_envs)
        frames = []
        tmax = 0

        # if save_info:
        # frames.append(env.render('rgb_array'))

        for episode_t in range(episode_length):

            # get actions
            # explore = only explore for a certain number of episodes
            # action input needs to be transposed
            actions = maddpg.act(transpose_to_tensor(obs), noise=noise)

            noise = max(initial_noise * decay**(episode_t / 20000), 0.001)
            # noise = max(noise*noise_reduction, 0.001)

            actions_array = torch.stack(actions).detach().numpy()

            # transpose the list of list
            # flip the first two indices
            # input to step requires the first index to correspond to number of parallel agents
            actions_for_env = np.rollaxis(actions_array, 1)

            # environment step
            # step forward one frame
            # next_obs, next_obs_full, rewards, dones, info = env.step(actions_for_env)

            # ADD FOR WITHOUT PARALLEL ENV
            # next_obs, rewards, dones, info = env.step(actions_for_env)
            next_obs, rewards, dones, info = env.step(actions_for_env)

            # rewards_sum += np.mean(rewards)

            # collect experience
            transition = (obs, actions_for_env, rewards, next_obs, dones)
            buffer.push(transition)

            reward_this_episode += rewards

            # obs, obs_full = next_obs, next_obs_full
            obs = next_obs

            # increment global step counter
            t += parallel_envs

            # save gif frame
            if save_info:
                # frames.append(env.render('rgb_array'))
                tmax += 1

            # for benchmarking learned policies
            if BENCHMARK:
                for i, inf in enumerate(info):
                    agent_info[-1][i].append(inf['n'])

        # update once after every episode_per_update
        # if len(buffer) > BATCH_SIZE and episode % episode_per_update < parallel_envs:
        if len(buffer) > BATCH_SIZE and episode % UPDATE_EVERY < parallel_envs:
            for _ in range(UPDATE_TIMES):
                for a_i in range(num_agents):
                    samples = buffer.sample(BATCH_SIZE)
                    maddpg.update(samples, a_i, logger)
                maddpg.update_targets(
                )  # soft update the target network towards the actual networks

        for i in range(parallel_envs):
            for n in range(num_agents):
                agents_reward[n].append(reward_this_episode[i, n])
            # agent0_reward.append(reward_this_episode[i,0])
            # agent1_reward.append(reward_this_episode[i,1])
            # agent2_reward.append(reward_this_episode[i,2])

        if episode % 100 == 0 or episode == number_of_episodes - 1:
            # avg_rewards = [np.mean(agent0_reward), np.mean(agent1_reward), np.mean(agent2_reward)]
            avg_rewards = []
            for n in range(num_agents):
                avg_rewards.append(np.mean(agents_reward[n]))
                # agent0_reward = []
            # agent1_reward = []
            # agent2_reward = []
            for a_i, avg_rew in enumerate(avg_rewards):
                logger.add_scalar('agent%i/mean_episode_rewards' % a_i,
                                  avg_rew, episode)

        # saving model
        save_dict_list = []
        if save_info:
            print('agent_info benchmark=', agent_info)
            for i in range(5):
                save_dict = {
                    'actor_params':
                    maddpg.maddpg_agent[i].actor.state_dict(),
                    'actor_optim_params':
                    maddpg.maddpg_agent[i].actor_optimizer.state_dict(),
                    'critic_params':
                    maddpg.maddpg_agent[i].critic.state_dict(),
                    'critic_optim_params':
                    maddpg.maddpg_agent[i].critic_optimizer.state_dict()
                }
                save_dict_list.append(save_dict)

                torch.save(
                    save_dict_list,
                    os.path.join(model_dir, 'episode-{}.pt'.format(episode)))

            # save gif files
            # imageio.mimsave(os.path.join(model_dir, 'episode-{}.gif'.format(episode)),
            #                 frames, duration=.04)

    env.close()
    logger.close()
    timer.finish()
Example #19
0
    qbox.upperRight = (section['right'] * width / shape[0],
                       height - section['top'] * height / shape[1])
    q.mediaBox = qbox
    return q


def join(parts, outfile):
    output = PdfFileWriter()
    for p in parts:
        output.addPage(p)
    output.write(file(outfile, 'w'))


pbar = progressbar.ProgressBar(widgets=[
    progressbar.Percentage(),
    progressbar.Counter('%5d'),
    progressbar.Bar(),
    progressbar.ETA()
],
                               maxval=len(pages)).start()

for i, (segments, page) in enumerate(zip(pages_sub_segments, pages)):
    for seg_info in segments:
        seg = seg_info['segment']
        is_col = seg_info['iscolumn']
        j += 1

        width = seg['right'] - seg['left']
        height = seg['bottom'] - seg['top']

        rotate = False
def main():
    seeding()
    # number of parallel agents
    parallel_envs = 4
    # number of training episodes.
    # change this to higher number to experiment. say 30000.
    number_of_episodes = 1000
    episode_length = 80
    batchsize = 1000
    # how many episodes to save policy and gif
    save_interval = 1000
    t = 0

    # amplitude of OU noise
    # this slowly decreases to 0
    noise = 2
    noise_reduction = 0.9999

    # how many episodes before update
    episode_per_update = 2 * parallel_envs

    log_path = os.getcwd() + "/log"
    model_dir = os.getcwd() + "/model_dir"

    os.makedirs(model_dir, exist_ok=True)

    torch.set_num_threads(parallel_envs)
    env = envs.make_parallel_env(parallel_envs)

    # keep 5000 episodes worth of replay
    buffer = ReplayBuffer(int(5000 * episode_length))

    # initialize policy and critic
    maddpg = MADDPG()
    logger = SummaryWriter(log_dir=log_path)
    agent0_reward = []
    agent1_reward = []
    agent2_reward = []

    # training loop
    # show progressbar
    import progressbar as pb
    widget = ['episode: ', pb.Counter(), '/', str(number_of_episodes), ' ',
              pb.Percentage(), ' ', pb.ETA(), ' ',
              pb.Bar(marker=pb.RotatingMarker()), ' ']

    timer = pb.ProgressBar(widgets=widget, maxval=number_of_episodes).start()

    # use keep_awake to keep workspace from disconnecting
    # for episode in keep_awake(range(0, number_of_episodes, parallel_envs))
    for episode in range(0, number_of_episodes, parallel_envs):

        timer.update(episode)

        reward_this_episode = np.zeros((parallel_envs, 3))
        all_obs = env.reset()
        obs, obs_full = transpose_list(all_obs)

        # for calculating rewards for this particular episode - addition of
        # all time steps

        # save info or not
        save_info = ((episode) % save_interval < parallel_envs or episode == number_of_episodes - parallel_envs)
        frames = []
        tmax = 0

        if save_info:
            frames.append(env.render('rgb_array'))

        for episode_t in range(episode_length):

            t += parallel_envs

            # explore = only explore for a certain number of episodes
            # action input needs to be transposed
            actions = maddpg.act(transpose_to_tensor(obs), noise=noise)
            noise *= noise_reduction

            actions_array = torch.stack(actions).detach().numpy()

            '''
            transpose the list of list
            flip the first two indices
            input to step requires the first index to correspond to number of
            parallel agents
            '''
            actions_for_env = np.rollaxis(actions_array, 1)

            # step forward one frame
            next_obs, next_obs_full, rewards, dones, info = env.step(
                actions_for_env)

            # add data to buffer
            transition = (obs, obs_full, actions_for_env,
                          rewards, next_obs, next_obs_full, dones)

            buffer.push(transition)

            reward_this_episode += rewards

            obs, obs_full = next_obs, next_obs_full

            # save gif frame
            if save_info:
                frames.append(env.render('rgb_array'))
                tmax += 1

        # update once after every episode_per_update
        if len(buffer) > batchsize and episode % episode_per_update < parallel_envs:
            # loop for each agent
            for a_i in range(3):
                samples = buffer.sample(batchsize)
                maddpg.update(samples, a_i, logger)
            # soft update the target network towards the actual networks
            maddpg.update_targets()

        for i in range(parallel_envs):
            agent0_reward.append(reward_this_episode[i, 0])
            agent1_reward.append(reward_this_episode[i, 1])
            agent2_reward.append(reward_this_episode[i, 2])

        if episode % 100 == 0 or episode == number_of_episodes - 1:
            avg_rewards = [np.mean(agent0_reward), np.mean(
                agent1_reward), np.mean(agent2_reward)]
            agent0_reward = []
            agent1_reward = []
            agent2_reward = []
            for a_i, avg_rew in enumerate(avg_rewards):
                logger.add_scalar('agent%i/mean_episode_rewards' %
                                  a_i, avg_rew, episode)

        # saving model
        save_dict_list = []
        if save_info:
            for i in range(3):
                save_dict = {'actor_params': maddpg.maddpg_agent[i].actor.state_dict(),
                             'actor_optim_params': maddpg.maddpg_agent[i].actor_optimizer.state_dict(),
                             'critic_params': maddpg.maddpg_agent[i].critic.state_dict(),
                             'critic_optim_params': maddpg.maddpg_agent[i].critic_optimizer.state_dict()}
                save_dict_list.append(save_dict)

                torch.save(save_dict_list,
                           os.path.join(model_dir, 'episode-{}.pt'.format(episode)))

            # save gif files
            imageio.mimsave(os.path.join(model_dir, 'episode-{}.gif'.format(episode)),
                            frames, duration=.04)

    env.close()
    logger.close()
    timer.finish()
Example #21
0
    def load_images_from_fs(self, properties, container):

        if "path" in properties["input"]:
            imagePaths = list(list_images(properties["input"]["path"]))
            labels = []
        else:
            imagePaths = container[properties["input"]["input_data"]]
            labels = container[properties["input"]["input_labels"]]

        if "pre_processing" in properties:
            pre_processing = properties["pre_processing"]

            for p in pre_processing:
                if p["type"] == "extract_mean_rgb":
                    self.extract_mean_rgb(p["save_to"], imagePaths)

        if "pipeline" in properties:
            pipeline = properties["pipeline"]

            for image_processor in pipeline:
                class_loader = FrameworkUtility.get_instance(
                    image_processor["processor"])
                img_process = class_loader(image_processor["properties"])
                self.image_processors.append(img_process)

        data = []
        widgets = [
            '[ImageLoader] ProcessingImages - ',
            progressbar.Bar('#', '[', ']'), ' [',
            progressbar.Percentage(), '] ', '[',
            progressbar.Counter(format='%(value)02d/%(max_value)d'), '] '
        ]

        bar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets)
        bar.start()

        if "hdf5_file" in properties["output"]:
            writer = HDF5DatasetWriter(
                (len(imagePaths),
                 properties["output"]["dim"]["features"]["width"],
                 properties["output"]["dim"]["features"]["height"],
                 properties["output"]["dim"]["features"]["depth"]),
                (labels.shape[0], properties["output"]["dim"]["target"]),
                properties["output"]["hdf5_file"])

        for (i, imagePath) in enumerate(imagePaths):
            image = cv2.imread(imagePath)

            if "path" in properties["input"]:
                label = imagePath.split(os.path.sep)[-2]
            else:
                label = labels[i]

            image = self.processing_pipeline(image)

            if "hdf5_file" in properties["output"]:
                writer.add([image], [label])
            else:
                data.append(image)
                labels.append(label)
            bar.update(i + 1)

        if "path" in properties["input"]:
            writer.close()

        bar.finish()

        return np.array(data), np.array(labels)
Example #22
0
def merge_all(dir_path: str) -> str:
    """merge all csv files in the dir_path into a single csv file
	Args:
		dir_path(str): absolute path to the source directory
	Returns:
		output_file_name(str), list of rows
	"""

    # get list of all csv files in the directory
    list_of_csv(dir_path)

    print(f'There are {len(files)} csv files in total!')

    print(f'Reading csv files({len(files)}) ...')

    pbar = progressbar.ProgressBar(maxval=len(files), \
     widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' | ', progressbar.Counter(), '/', str(len(files))])
    pbar.start()

    for index, file in enumerate(files):
        df = pd.read_csv(file)
        if df.shape[0] == 0:  # skip over empty files
            continue
        df = df.replace(np.nan, '', regex=True)
        data.append(df)
        # print(df)
        pbar.update(index + 1)

    df_data = pd.concat(data)
    df_data['ts'] = pd.to_datetime(df_data['ts'])
    df_data.sort_values(by='ts', inplace=True)

    # get first & last reading date
    start_date = df_data.iloc[0]['ts']
    end_date = df_data.iloc[len(df_data.index) - 1]['ts']
    timeline = pd.date_range(start_date, end_date, freq='15T')

    print(f'Processing data...')

    # get total count of sensors
    sensor_count = len(files)

    bar = progressbar.ProgressBar(maxval=len(data), \
     widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' | ', progressbar.Counter(), '/', str(len(data))])
    bar.start()

    filtered_data = []

    # create a new dataframe of all timestamps included(deduplicated)
    result = timeline.to_frame(index=False, name='ts')

    # merge every csv into timeframe dataframe
    for index, item in enumerate(data):
        try:
            val_column = item.iloc[0][0]
        except IndexError:
            continue

        item['ts'] = pd.to_datetime(item['ts'])
        item = item.rename(columns={'val': val_column}, copy=False)
        item = pd.merge(result,
                        item[['ts', val_column]],
                        left_on='ts',
                        right_on='ts',
                        how='left',
                        copy=False)
        item.set_index(['ts'], inplace=True)
        filtered_data.append(item)
        bar.update(index + 1)

    # merging dataframes into a single one
    print(f'Combining data into a single dataframe...')
    result = pd.concat(filtered_data, axis=1, join='outer')
    # print(result)

    # make up file name
    prefix = os.path.basename(os.path.normpath(dir_path)).replace("_", "-")
    output_file_name = f"{prefix}_{start_date.strftime('%m-%d-%Y')}_{end_date.strftime('%m-%d-%Y')}_{sensor_count}.csv"
    current_path = os.path.abspath(os.path.dirname(sys.argv[0]))

    # create csv from result dataframe
    print(f'Creating csv file...')
    result.to_csv(os.path.join(current_path, output_file_name),
                  index=True,
                  header=True)

    print(f'Done!')
    def update_embedding(self,
                         session,
                         batch_size,
                         model_path,
                         loaded_emb=None,
                         loaded_obj_bbs=None):

        # model_name = os.path.basename(model_path).split('.')[0]
        model_name = self._get_codebook_name(model_path)

        self._dataset._kw['model_path'] = list([str(model_path)])
        self._dataset._kw[
            'model'] = 'cad' if 'cad' in model_path else self._dataset._kw[
                'model']
        self._dataset._kw[
            'model'] = 'reconst' if 'reconst' in model_path else self._dataset._kw[
                'model']

        if loaded_emb is None:
            embedding_size = self._dataset.embedding_size
            J = self._encoder.latent_space_size
            embedding_z = np.empty((embedding_size, J))
            obj_bbs = np.empty((embedding_size, 4))
            widgets = [
                'Creating embedding: ',
                progressbar.Percentage(), ' ',
                progressbar.Bar(), ' ',
                progressbar.Counter(),
                ' / %s' % embedding_size, ' ',
                progressbar.ETA(), ' '
            ]
            bar = progressbar.ProgressBar(maxval=embedding_size,
                                          widgets=widgets)
            bar.start()
            for a, e in u.batch_iteration_indices(embedding_size, batch_size):

                batch, obj_bbs_batch = self._dataset.render_embedding_image_batch(
                    a, e)
                # import cv2
                # cv2.imshow('',u.tiles(batch,10,10))
                # cv2.waitKey(0)
                embedding_z[a:e] = session.run(
                    self._encoder.z, feed_dict={self._encoder.x: batch})

                if self.embed_bb:
                    obj_bbs[a:e] = obj_bbs_batch

                bar.update(e)
            bar.finish()
            # embedding_z = embedding_z.T
            normalized_embedding = embedding_z / np.linalg.norm(
                embedding_z, axis=1, keepdims=True)
        else:
            normalized_embedding = loaded_emb
            obj_bbs = loaded_obj_bbs

        session.run(self.embedding_assign_op[model_name],
                    {self.embedding: normalized_embedding})

        if self.embed_bb:
            session.run(self.embed_obj_bbs_assign_op[model_name],
                        {self.embed_obj_bbs: obj_bbs})