Esempio n. 1
0
def get_commits(pr_number, owner='gisce', repository='erp'):
    # Pagination documentation: https://developer.github.com/v3/#pagination
    def parse_github_links_header(links_header):
        ret_links = {}
        full_links = links_header.split(',')
        for link in full_links:
            link_url, link_ref = link.split(';')
            link_url = link_url.strip()[1:-1]
            link_ref = link_ref.split('=')[-1].strip()[1:-1]
            ret_links[link_ref] = link_url
        return ret_links

    logger.info('Getting commits from GitHub')
    headers = {'Authorization': 'token %s' % github_config()['token']}
    repo = github_config(
        repository='{}/{}'.format(owner, repository))['repository']
    url = "https://api.github.com/repos/%s/pulls/%s/commits?per_page=100" \
          % (repo, pr_number)
    r = requests.get(url, headers=headers)
    commits = json.loads(r.text)
    if 'link' in r.headers:
        url_page = 1
        links = parse_github_links_header(r.headers['link'])
        while links['last'][-1] != str(url_page):
            url_page += 1
            tqdm.write(colors.yellow(
                '    - Getting extra commits page {}'.format(url_page)))
            r = requests.get(links['next'], headers=headers)
            commits += json.loads(r.text)
    return commits
Esempio n. 2
0
def check_it_exists(src='/home/erp/src', repository='erp', sudo_user='******'):
    with settings(hide('everything'), sudo_user=sudo_user, warn_only=True):
        res = sudo("ls {}/{}".format(src, repository))
        if res.return_code:
            message = "The repository does not exist or cannot be found"
            tqdm.write(colors.red(message))
            abort(message)
Esempio n. 3
0
 def catch_result(self, result):
     for line in result.split('\n'):
         if re.match('Applying: ', line):
             tqdm.write(colors.green(line))
             self.pbar.update()
     if result.failed:
         if "git config --global user.email" in result:
             logger.error(
                 "Need to configure git for this user\n"
             )
             raise GitHubException(result)
         try:
             raise WiggleException
         except WiggleException:
             if self.auto_exit:
                 sudo("git am --abort")
                 logger.error('Aborting deploy and go back')
                 raise GitHubException
             prompt("Manual resolve...")
         finally:
             if not self.auto_exit:
                 to_commit = sudo(
                     "git diff --cached --name-only --no-color", pty=False
                 )
                 if to_commit:
                     self.resolve()
                 else:
                     self.skip()
Esempio n. 4
0
def tprint(string):
    """Print string via `tqdm` so that it doesnt interfere with a progressbar.
    """
    try:
        tqdm.write(string)
    except:
        print(string)
Esempio n. 5
0
 def _print_epoch_means(self):
   last_val_accs   = np.array(self.validation_accuracies)
   v_mean          = np.mean(last_val_accs[-801:-1])
   last_train_accs = np.array(self.train_accuracies)
   t_mean          = np.mean(last_train_accs[-801:-1])
   #tqdm.write('EPOCH %d:'%(self.epoch))
   tqdm.write('training => %.5f / val => %.5f'%(t_mean,v_mean))
Esempio n. 6
0
def tpv2tan_hdr(img, ota):
    image = odi.reprojpath+'reproj_'+ota+'.'+img.stem()
    # change the CTYPENs to be TANs if they aren't already
    tqdm.write('TPV -> TAN in {:s}'.format(image))
    iraf.imutil.hedit.setParam('images',image)
    iraf.imutil.hedit.setParam('fields','CTYPE1')
    iraf.imutil.hedit.setParam('value','RA---TAN')
    iraf.imutil.hedit.setParam('add','yes')
    iraf.imutil.hedit.setParam('addonly','no')
    iraf.imutil.hedit.setParam('verify','no')
    iraf.imutil.hedit.setParam('update','yes')
    iraf.imutil.hedit(show='no', mode='h')

    iraf.imutil.hedit.setParam('images',image)
    iraf.imutil.hedit.setParam('fields','CTYPE2')
    iraf.imutil.hedit.setParam('value','DEC--TAN')
    iraf.imutil.hedit.setParam('add','yes')
    iraf.imutil.hedit.setParam('addonly','no')
    iraf.imutil.hedit.setParam('verify','no')
    iraf.imutil.hedit.setParam('update','yes')
    iraf.imutil.hedit(show='no', mode='h')

    # delete any PV keywords
    # leaving them in will give you trouble with the img wcs
    iraf.unlearn(iraf.imutil.hedit)
    iraf.imutil.hedit.setParam('images',image)
    iraf.imutil.hedit.setParam('fields','PV*')
    iraf.imutil.hedit.setParam('delete','yes')
    iraf.imutil.hedit.setParam('verify','no')
    iraf.imutil.hedit.setParam('update','yes')
    iraf.imutil.hedit(show='no', mode='h')
Esempio n. 7
0
def resync_invoiceitems(apps, schema_editor):
    """
    Since invoiceitem IDs were not previously stored (the ``stripe_id`` field held the id of the linked subsription),
    a direct migration will leave us with a bunch of orphaned objects. It was decided
    [here](https://github.com/kavdev/dj-stripe/issues/162) that a purge and re-sync would be the best option for
    subscriptions. That's being extended to InvoiceItems. No data that is currently available on stripe will be
    deleted. Anything stored locally will be purged.
    """

    # This is okay, since we're only doing a forward migration.
    from djstripe.models import InvoiceItem

    from djstripe.context_managers import stripe_temporary_api_version

    with stripe_temporary_api_version("2016-03-07"):
        if InvoiceItem.objects.count():
            print("Purging invoiceitems. Don't worry, all invoiceitems will be re-synced from stripe. Just in case you \
            didn't get the memo, we'll print out a json representation of each object for your records:")
            print(serializers.serialize("json", InvoiceItem.objects.all()))
            InvoiceItem.objects.all().delete()

            print("Re-syncing invoiceitems. This may take a while.")

            for stripe_invoiceitem in tqdm(iterable=InvoiceItem.api_list(), desc="Sync", unit=" invoiceitems"):
                invoice = InvoiceItem.sync_from_stripe_data(stripe_invoiceitem)

                if not invoice.customer:
                    tqdm.write("The customer for this invoiceitem ({invoiceitem_id}) does not exist \
                    locally (so we won't sync the invoiceitem). You'll want to figure out how that \
                    happened.".format(invoiceitem_id=stripe_invoiceitem['id']))

            print("InvoiceItem re-sync complete.")
Esempio n. 8
0
def resync_subscriptions(apps, schema_editor):
    """
    Since subscription IDs were not previously stored, a direct migration will leave us
    with a bunch of orphaned objects. It was decided [here](https://github.com/kavdev/dj-stripe/issues/162)
    that a purge and re-sync would be the best option. No data that is currently available on stripe will
    be deleted. Anything stored locally will be purged.
    """

    # This is okay, since we're only doing a forward migration.
    from djstripe.models import Subscription

    from djstripe.context_managers import stripe_temporary_api_version

    with stripe_temporary_api_version("2016-03-07"):
        if Subscription.objects.count():
            print("Purging subscriptions. Don't worry, all active subscriptions will be re-synced from stripe. Just in \
            case you didn't get the memo, we'll print out a json representation of each object for your records:")
            print(serializers.serialize("json", Subscription.objects.all()))
            Subscription.objects.all().delete()

            print("Re-syncing subscriptions. This may take a while.")

            for stripe_subscription in tqdm(iterable=Subscription.api_list(), desc="Sync", unit=" subscriptions"):
                subscription = Subscription.sync_from_stripe_data(stripe_subscription)

                if not subscription.customer:
                    tqdm.write("The customer for this subscription ({subscription_id}) does not exist locally (so we \
                    won't sync the subscription). You'll want to figure out how that \
                    happened.".format(subscription_id=stripe_subscription['id']))

            print("Subscription re-sync complete.")
Esempio n. 9
0
    def check_trace(self, step_method):
        """Tests whether the trace for step methods is exactly the same as on master.

        Code changes that effect how random numbers are drawn may change this, and require
        `master_samples` to be updated, but such changes should be noted and justified in the
        commit.

        This method may also be used to benchmark step methods across commits, by running, for
        example

        ```
        BENCHMARK=100000 ./scripts/test.sh -s pymc3/tests/test_step.py:TestStepMethods
        ```

        on multiple commits.
        """
        test_steps = 100
        n_steps = int(os.getenv('BENCHMARK', 100))
        benchmarking = (n_steps != test_steps)
        if benchmarking:
            tqdm.write('Benchmarking {} with {:,d} samples'.format(step_method.__name__, n_steps))
        else:
            tqdm.write('Checking {} has same trace as on master'.format(step_method.__name__))
        with Model():
            Normal('x', mu=0, sd=1)
            trace = sample(n_steps, step=step_method(), random_seed=1)

        if not benchmarking:
            assert_array_almost_equal(trace.get_values('x'), self.master_samples[step_method])
Esempio n. 10
0
 def check_alignments(self, filename):
     """ If we have no alignments for this image, skip it """
     have_alignments = self.faces.have_face(filename)
     if not have_alignments:
         tqdm.write("No alignment found for {}, "
                    "skipping".format(os.path.basename(filename)))
     return have_alignments
Esempio n. 11
0
def mal(mal_title, mal_id=False):
    cookies = {"incap_ses_224_81958":"P6tYbUr7VH9V6shgudAbA1g5FVYAAAAAyt7eDF9npLc6I7roc0UIEQ=="}
    response = requests.get(
        "http://myanimelist.net/api/anime/search.xml",
        params={'q':mal_title},
        cookies=cookies,
        auth=("zodman1","zxczxc"),
        headers = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'})
    content = response.content
    if not mal_id is False:
         for e in xpath.search(content,"//entry"):
             if mal_id in e:
                 content = e
                 break

    tqdm.write("%s %s"%((mal_title,), mal_id))
    id = xpath.get(content, "//id")
    title = xpath.get(content, "//title")
    title_en = xpath.get(content, "//english")
    type_ = xpath.get(content, "//type")
    synonyms = xpath.get(content, "//synonyms")
    status = xpath.get(content, "//status")
    synopsys = translate(xpath.get(content, "//synopsis"),"es")
    img  = xpath.get(content, "//image")
    episodes = xpath.get(content,"//episodes")
    resumen = synopsys.replace("<br />", " ").replace("\n\r","")
    resumen = translate(resumen,'es')
    status = translate(status,'es')
    assert id is not "", mal_title

    data=dict(title=title, title_en=title_en, type=type_, status=status,
    resumen=resumen, img=img,episodes=episodes, synonyms=synonyms,id=id, synopsys=synopsys)
    return MalResult(**data)
Esempio n. 12
0
    def handle_single(self, filename, verbosity, remove):
        tqdm.write("Work on {!r}".format(filename))
        basename = os.path.splitext(os.path.basename(filename))[0]
        tree = ET.parse(filename)
        root = tree.getroot()

        words = defaultdict(lambda: {'words': set(), 'fichas': list()})

        fichas = root.findall('./ficha')
        for ficha in tqdm(fichas, desc=basename, leave=False):
            lemma = ''.join(ficha.find('./lema').itertext()).strip()

            data = ImportSM.work_on_ficha(ficha)
            data = [it[0] for it in data]
            for it in data:
                try:
                    w = Word.objects.get(word=it.encode('utf-8'))
                    words[w.pk]['words'].add(it)
                    words[w.pk]['fichas'].append((filename, ficha.attrib['ID'], ficha))
                except Word.DoesNotExist:
                    tqdm.write("not found {}".format(lemma))

        # Detect duplicates!
        dupes = {k: v for k, v in words.items() if len(v['words']) > 1}
        if dupes:
            remove_msg = " (will be removed!)"
            tqdm.write("...found {} duplicates{}".format(len(dupes), remove_msg))
            for w, values in dupes.items():
                word = Word.objects.get(pk=w)
                tqdm.write(" - pk: {!r}: {}".format(w, word))
                for ficha in values['fichas']:
                    tqdm.write("   + {}: [ID={!r}] {}".format(ficha[0], ficha[1], ''.join(ficha[2].find('./lema').itertext()).strip()))

                if remove:
                    word.delete()
Esempio n. 13
0
 def check_alignments(self, frame):
     """ If we have no alignments for this image, skip it """
     have_alignments = self.alignments.frame_exists(frame)
     if not have_alignments:
         tqdm.write("No alignment found for {}, "
                    "skipping".format(frame))
     return have_alignments
Esempio n. 14
0
File: log.py Progetto: wmayner/pyphi
 def emit(self, record):
     try:
         msg = self.format(record)
         tqdm.write(msg, file=self.stream, end=self.terminator)
         self.flush()
     except Exception:  # pylint: disable=broad-except
         self.handleError(record)
def get_exec_times(graph):
    # Get execution times for reports (-m option)
    basename,_ = os.path.splitext(os.path.basename(graph))
    reports = glob.glob("*"+basename + "*.csv")
    reports.sort(reverse=True, key= lambda f: os.path.getmtime(f))
    csvfile = reports[0]
    tqdm.write("Retrieving monitoring info from "+ csvfile)
    return get_costs(csvfile)
Esempio n. 16
0
 def train(self, episodes=500, max_step=200):
     for episode in tqdm(range(episodes)):
         if episode % 50 == 0:
             total_reward = self._test_impl(max_step, delay=0, gui=False)
             tqdm.write('current reward: {total_reward}'.format(total_reward=total_reward))
         else:
             # train step
             self._train_impl(max_step)
Esempio n. 17
0
def check_am_session(src='/home/erp/src', repository='erp', sudo_user='******'):
    with settings(hide('everything'), sudo_user=sudo_user, warn_only=True):
        with cd("{}/{}".format(src, repository)):
            res = sudo("ls .git/rebase-apply")
            if not res.return_code:
                message = "The repository is in the middle of an am session!"
                tqdm.write(colors.red(message))
                abort(message)
Esempio n. 18
0
def check_is_rolling(src='/home/erp/src', repository='erp', sudo_user='******'):
    with settings(hide('everything'), sudo_user=sudo_user, warn_only=True):
        with cd("{}/{}".format(src, repository)):
            res = sudo("git branch | grep '* rolling'")
            if res.return_code:
                message = "The repository is not in rolling mode"
                tqdm.write(colors.red(message))
                abort(message)
Esempio n. 19
0
 def _saveSession(self, sess):
     """ Save the model parameters and the variables
     Args:
         sess: the current session
     """
     tqdm.write('Checkpoint reached: saving model (don\'t stop the run)...')
     self.saveModelParams()
     self.saver.save(sess, self._getModelName())  # TODO: Put a limit size (ex: 3GB for the modelDir)
     tqdm.write('Model saved.')
Esempio n. 20
0
 def _write(self, msg):
     """
     Write error messages to the progress bar, if using one,
     otherwise to stderr
     """
     if self._progbar:
         tqdm.write(msg)
     else:
         print(msg, file= sys.stderr)
Esempio n. 21
0
    def _evaluate(self, sess, dataset_name, return_extras=False):
        y_pred = np.zeros(self.dataset.num_examples(dataset_name), dtype=np.int32)

        tqdm.write('Running evaluation for dataset %s' % dataset_name, file=sys.stderr)
        for step, dataset_chunk in self.dataset.traverse_dataset(dataset_name, self.batch_size):

            y_pred[step:min(step+self.batch_size, self.dataset.num_examples(dataset_name))] =\
                self.predict(sess, dataset_chunk)

        y_true = self.dataset.dataset_labels(dataset_name, self.cl_iteration)
        return self.get_metrics(y_true, y_pred, return_extras=return_extras)
Esempio n. 22
0
def illumination_corrections(image_to_correct, correction_image, corrected_image,do_correction=True):
    #print image_to_correct,correction_image,corrected_image
    iraf.unlearn(iraf.imutil.imarith,iraf.imfilter.median)
    iraf.imutil.imarith.setParam('operand1',image_to_correct)
    iraf.imutil.imarith.setParam('op','/')
    if do_correction == True:
        iraf.imutil.imarith.setParam('operand2',odi.skyflatpath+correction_image)
    else:
        tqdm.write('not applying illcor')
        iraf.imutil.imarith.setParam('operand2',1.0)
    iraf.imutil.imarith.setParam('result',odi.illcorpath+corrected_image)
    iraf.imutil.imarith(mode='h')
Esempio n. 23
0
    def mainTrain(self, sess):
        """ Training loop
        Args:
            sess: The current running session
        """

        # Specific training dependent loading

        self.textData.makeLighter(self.args.ratioDataset)  # Limit the number of training samples

        mergedSummaries = tf.summary.merge_all()  # Define the summary operator (Warning: Won't appear on the tensorboard graph)
        if self.globStep == 0:  # Not restoring from previous run
            self.writer.add_graph(sess.graph)  # First time only

        # If restoring a model, restore the progression bar ? and current batch ?

        print('Start training (press Ctrl+C to save and exit)...')

        try:  # If the user exit while training, we still try to save the model
            for e in range(self.args.numEpochs):

                print()
                print("----- Epoch {}/{} ; (lr={}) -----".format(e+1, self.args.numEpochs, self.args.learningRate))

                batches = self.textData.getBatches()

                # TODO: Also update learning parameters eventually

                tic = datetime.datetime.now()
                for nextBatch in tqdm(batches, desc="Training"):
                    # Training pass
                    ops, feedDict = self.model.step(nextBatch)
                    assert len(ops) == 2  # training, loss
                    _, loss, summary = sess.run(ops + (mergedSummaries,), feedDict)
                    self.writer.add_summary(summary, self.globStep)
                    self.globStep += 1

                    # Output training status
                    if self.globStep % 100 == 0:
                        perplexity = math.exp(float(loss)) if loss < 300 else float("inf")
                        tqdm.write("----- Step %d -- Loss %.2f -- Perplexity %.2f" % (self.globStep, loss, perplexity))

                    # Checkpoint
                    if self.globStep % self.args.saveEvery == 0:
                        self._saveSession(sess)

                toc = datetime.datetime.now()

                print("Epoch finished in {}".format(toc-tic))  # Warning: Will overflow if an epoch takes more than 24 hours, and the output isn't really nicer
        except (KeyboardInterrupt, SystemExit):  # If the user press Ctrl+C while testing progress
            print('Interruption detected, exiting the program...')

        self._saveSession(sess)  # Ultimate saving before complete exit
Esempio n. 24
0
 def _saveSession(self, sess):
     """ Save the model parameters and the variables
     Args:
         sess: the current session
     """
     tqdm.write('Checkpoint reached: saving model (don\'t stop the run)...')
     self.saveModelParams()
     model_name = self._getModelName()
     with open(model_name, 'w') as f:  # HACK: Simulate the old model existance to avoid rewriting the file parser
         f.write('This file is used internally by DeepQA to check the model existance. Please do not remove.\n')
     self.saver.save(sess, model_name)  # TODO: Put a limit size (ex: 3GB for the modelDir)
     tqdm.write('Model saved.')
Esempio n. 25
0
    def handle(self, *args, **kwargs):
        results = []
        for fansub, users in tqdm(NYAA_USERS.items()):
            f,_ = Fansub.objects.get_or_create(name=fansub)
            for user in users:
                offset = 1
                while True:
                    results_ = nyaa.search(user=user,offset=offset)
                    if not results_:
                        break
                    offset +=1
                    results+=results_

        for res in tqdm(results):
            flag_next = False
            for j in BYPASS:
                if j.lower() in res.title.lower():
                    flag_next = True
                    break
            if flag_next: continue
            date = make_aware(res.date, is_dst=False)
            torrent,created  = Torrent.objects.get_or_create(full=res.title, 
                url=res.link.replace("download","view"), defaults=dict(download_url = res.link.replace("view","download"),
                date=date))
            tqdm.write("%s %s " % (res.title, res.link ))
            full = res.title
            data = guessit.guessit(full, {"episode_prefer_number":True, 'expected_group':RELEASE_GROUPS})
            title = data.get("title")
            kwargs_ = {}
            if title in MAL_ANIMES:
                search_title,  mal_id = MAL_ANIMES[title]
                title = search_title
                kwargs_ = {'mal_id':mal_id}
            try:
                mal_data = mal(title,**kwargs_)
            except:
                continue
            anime,_ = Anime.objects.get_or_create(slug=slugify(data.get("title")), defaults={"title":data.get("title")})
            release_group,_ = ReleaseGroup.objects.get_or_create(name=data.get("release_group"))
            mal_obj, _ = MALMeta.objects.get_or_create(mal_id=mal_data.id)
            mal_obj.title = mal_data.title
            mal_obj.image = mal_data.img
            mal_obj.synopsys = mal_data.synopsys
            mal_obj.resumen = mal_data.resumen
            mal_obj.synonyms = mal_data.synonyms
            mal_obj.title_en = mal_data.title_en
            mal_obj.status = mal_data.status
            mal_obj.save()
            meta,_ = MetaTorrent.objects.get_or_create(
                    anime=anime, torrent=torrent, release_group=release_group, mal=mal_obj)
            meta.episode=data.get("episode", data.get("episode_title"))
            meta.format=data.get("format", data.get("screen_size"))
            meta.save()
Esempio n. 26
0
 def add_files(self, filenames):
   files = []
   # Look for these files in the index
   for filename in tqdm([os.path.abspath(x) for x in filenames], leave=False):
     tqdm.write("Indexing {}".format(filename))
     if filename in self._names:
       files.append(self._update_if_required(filename))
     else:
       logger.debug("Adding new file to index: {}".format(filename))
       entry = entry_for_file(filename)
       self._process_entries([entry])
       files.append(entry)
   return files
Esempio n. 27
0
def dir_bruter(word_queue, target_url, stime, extensions=None, pbar=None):

    while not word_queue.empty():

        pbar.update(1)
        
        attempt = word_queue.get()

        attempt_list = []

        # 检查是否有文件扩展名,如果没有就是我们要暴力破解的路径
        # if "." not in attempt:
        # 	attempt_list.append("%s/" % attempt)
        # else:
        attempt_list.append("%s" % attempt)

        # 如果我们想暴力扩展
        if extensions:
            for extension in extensions:
                if extension == ".swp":
                    attempt_list.append("/.%s%s" % (attempt.strip('/'), extension))
                else:
                    attempt_list.append("%s%s" % (attempt, extension))

        # 迭代我们想要尝试的文件列表
        for brute in attempt_list:

            url = "%s%s" % (target_url, urllib.quote(brute))
            # print url
            try:
                headers = {}
                headers["User-Agent"] = conf['ua']
                r = urllib2.Request(url, headers=headers)
                # pbar.update(1)
                try:
                    response = urllib2.urlopen(r, timeout=2)
                except:
                    logger.error("Time out...")
                    continue  # 有可能卡死

                # 请求完成后睡眠
                time.sleep(stime)

                if response.code != 404:
                    logger.info("Get !!!!" + url)
                    tqdm.write("[%d] => %s" % (response.code, url))

            except urllib2.URLError, e:
                if hasattr(e, 'code') and e.code != 404:
                    tqdm.write("!!! %d => %s" % (e.code, url))
Esempio n. 28
0
    def emit(self, record):
        msg = self.format(record)

        # Handle logging on several lines
        msg = msg.replace(
            '\n',
            '\n_' + ' ' * (len(msg) - len(record.message) - 1))

        # Add color
        for reg, color in self.color_subst:
            msg = re.sub(reg,
                         color + r'\1' + Fore.RESET + Style.RESET_ALL,
                         msg)

        tqdm.write(msg)
Esempio n. 29
0
    def _crawl(self, url: URL, save: bool = True) -> Any:
        try:
            data = requests.get(url).json()
        except json.JSONDecodeError as err:
            tqdm.write(f"JSON decode failure: {url}")
            return None

        if save:
            out_data = json.dumps(data, indent=4, sort_keys=True)
            out_data = out_data.replace(str(self._src_url), "")
            file = self._dest_dir.joinpath((url / "index.json").path[1:])
            file.parent.mkdir(parents=True, exist_ok=True)
            file.write_text(out_data)

        return data
Esempio n. 30
0
def sync_charges(apps, schema_editor):
    # This is okay, since we're only doing a forward migration.
    from djstripe.models import Charge

    from djstripe.context_managers import stripe_temporary_api_version

    with stripe_temporary_api_version("2016-03-07"):
        if Charge.objects.count():
            print("syncing charges. This may take a while.")

            for charge in tqdm(Charge.objects.all(), desc="Sync", unit=" charges"):
                try:
                    Charge.sync_from_stripe_data(charge.api_retrieve())
                except InvalidRequestError:
                    tqdm.write("There was an error while syncing charge ({charge_id}).".format(charge_id=charge.stripe_id))

            print("Charge sync complete.")
 def _log_epoch(self, engine):
     self.pbar.refresh()
     tqdm.write("Epoch: {} - avg loss: {:.5f}".format(
         engine.state.epoch, self.running_loss / self.n_batches))
     self.running_loss = 0
     self.pbar.n = self.pbar.last_print_n = 0
Esempio n. 32
0
out = tx.Activation(logits, tx.softmax)

labels = tx.dense_one_hot(loss_inputs.tensor, vocab_size)
loss = tf.reduce_mean(tx.categorical_cross_entropy(labels=labels, logits=logits.tensor))

# setup optimizer
optimizer = tx.AMSGrad(learning_rate=0.01)

model = tx.Model(run_inputs=in_layer, run_outputs=out,
                 train_inputs=in_layer, train_outputs=out,
                 train_in_loss=loss_inputs, train_out_loss=loss,
                 eval_out_score=loss, eval_in_score=loss_inputs)

print(model.feedable_train())

runner = tx.ModelRunner(model)
runner.config_optimizer(optimizer)

runner.init_vars()

# need to fix the runner interface to allow for lists to be received
data = np.array([[0, 1], [1, 0]])
targets = np.array([[2], [3]])

for i in tqdm(range(10000)):
    runner.train(model_input_data=data, loss_input_data=targets)

    if i % 1000 == 0:
        loss = runner.eval(data, targets)
        tqdm.write("loss: {}".format(loss))
Esempio n. 33
0
File: lttc.py Progetto: uhh-lt/lttc
    def pipeline(self, args):
        def evaluate(args, dloader):
            model = self.pargs.modelinstance
            # Turn on evaluation mode which disables dropout.
            model.eval()
            test_loss_batch = torch.zeros(len(dloader))
            ids = []
            predictions = []
            logprobs = []
            targets = []

            with torch.no_grad():
                for batch_i, batch_data in enumerate(
                        tqdm(dloader, ncols=89, desc='Test ')):
                    loss, (sampleids, outputs, predictions_,
                           targets_) = process(batch_data, istraining=False)
                    if args.l1reg > 0:
                        reg_loss = l1reg(model)
                        loss += args.l1reg * reg_loss
                    # keep track of some scores
                    test_loss_batch[batch_i] = loss.item()
                    ids.extend(sampleids.tolist())
                    logprobs.extend(outputs.data.tolist())
                    predictions.extend(predictions_.tolist())
                    targets.extend(targets_.tolist())
            test_loss = test_loss_batch.mean()
            return test_loss, ids, logprobs, predictions, targets, test_loss_batch

        def l1reg(model):
            # add l1 regularization
            reg_loss = 0
            for param_i, param in enumerate(model.parameters()):
                if param is None:
                    continue
                reg_loss += torch.functional.F.l1_loss(
                    param, target=torch.zeros_like(param), size_average=False)
            reg_loss /= (param_i + 1)
            return reg_loss

        def train(args):
            model = self.pargs.modelinstance
            # Turn on training mode which enables dropout.
            model.train()

            train_loss_batch = torch.zeros(len(self.pargs.trainloader))
            sample_i = 0
            report_i = 0
            report_interval_begin_sample = 0
            report_interval_begin_batch = 0
            predictions = []
            targets = []

            for batch_i, batch_data in enumerate(
                    tqdm(self.pargs.trainloader, ncols=89, desc='Train')):
                batch_start_time = time.time()
                model.zero_grad()
                loss, (_, outputs, batch_predictions,
                       batch_targets) = process(batch_data, istraining=True)
                if args.l1reg > 0:
                    reg_loss = l1reg(model)
                    loss += args.l1reg * reg_loss
                loss.backward()
                self.pargs.modeloptimizingscheduler.step()
                # track some scores
                train_loss_batch[batch_i] = loss.item()
                predictions.extend(batch_predictions.tolist())
                targets.extend(batch_targets.tolist())
                sample_i += batch_targets.size(0)

                if ((sample_i - report_interval_begin_sample) //
                        self.pargs.report_after_n_samples) > 0:
                    cur_loss = train_loss_batch[report_interval_begin_batch:(
                        batch_i + 1)].mean()
                    cur_scores = self.getscores(
                        targets[report_interval_begin_sample:],
                        predictions[report_interval_begin_sample:])
                    cum_scores = self.getscores(targets, predictions)
                    tqdm.write(
                        self.message_status_interval(
                            '*** training status ***', report_i + 1,
                            args.status_reports, epoch, args.epochs,
                            report_interval_begin_batch, batch_i + 1,
                            self.pargs.ntrainbatches,
                            args.batch_size, report_interval_begin_sample,
                            len(targets), self.pargs.ntrainsamples,
                            batch_start_time, cur_loss,
                            train_loss_batch.mean(), cur_scores, cum_scores))
                    report_interval_begin_sample = len(targets)
                    report_interval_begin_batch = batch_i + 1
                    report_i += 1

            train_loss = train_loss_batch.mean()
            return train_loss, predictions, targets, train_loss_batch

        ###
        # Run pipeline
        ###
        best_run = utils.AttributeHolder(test_val=float('-inf'), epoch=0)
        args.status_reports = min(args.status_reports,
                                  self.pargs.ntrainbatches)
        self.pargs.report_after_n_samples = math.ceil(
            self.pargs.ntrainsamples / (args.status_reports + 1))
        process = self.pargs.modelprocessfun
        for epoch in tqdm(range(1, args.epochs + 1), ncols=89, desc='Epochs'):
            epoch_start_time = time.time()
            train_loss_cum, train_predictions_cum, train_targets_cum, _ = train(
                args)
            train_scores_cum = self.getscores(train_targets_cum,
                                              train_predictions_cum)

            # test training set
            train_loss, train_sampleids, train_logprobs, train_predictions, train_targets, _ = evaluate(
                args, self.pargs.trainloader)
            train_scores = self.getscores(train_targets, train_predictions)
            # test test set
            if self.pargs.testset:
                test_loss, test_sampleids, test_logprobs, test_predictions, test_targets, _ = evaluate(
                    args, self.pargs.testloader)
                test_scores = self.getscores(test_targets,
                                             test_predictions,
                                             extended=True)
            else:
                test_loss, test_sampleids, test_logprobs, test_predictions, test_targets = train_loss, train_sampleids, train_logprobs, train_predictions, train_targets
                test_scores = train_scores

            # print scores
            status_message = self.message_status_endepoch(
                '', epoch, epoch_start_time,
                self.pargs.modeloptimizer.getLearningRate(), train_loss,
                test_loss, train_scores, test_scores, best_run)
            tqdm.write(status_message)
            if best_run.test_val < test_scores[
                    self.pargs.best_run_test_valname]:
                tqdm.write(
                    f'''  > Saving model and prediction results to '{args.model:s}'...'''
                )
                self.savemodel(args, epoch, status_message, suffix='')
                self.savepredictions(args,
                                     test_sampleids,
                                     test_logprobs,
                                     test_predictions,
                                     test_targets,
                                     test_scores,
                                     suffix=f'')
                best_run.test_valname = self.pargs.best_run_test_valname
                best_run.test_val = test_scores[best_run.test_valname]
                best_run.epoch = epoch
                best_run.train_scores_cum = train_scores_cum
                best_run.train_scores = train_scores
                best_run.test_scores = test_scores
                best_run.train_loss = train_loss
                best_run.test_loss = test_loss
                tqdm.write('  > ... Finished saving\n  |')
        # save final model and scores
        tqdm.write(
            f'''  > Saving final model and prediction results to '{args.model:s}'...'''
        )
        self.savemodel(args, epoch, status_message, suffix='-final')
        self.savepredictions(args,
                             test_sampleids,
                             test_logprobs,
                             test_predictions,
                             test_targets,
                             test_scores,
                             suffix='-final')
        tqdm.write('  > ... Finished saving\n  |')
Esempio n. 34
0
            val_acc /= len(dev.dataset)
            writer.add_scalars('loss', {
                'train': train_loss / (step + 1),
                'val': val_loss
            },
                               epoch * len(train) + step)
            model.train()

    train_loss /= (step + 1)
    train_acc /= (step + 1)

    tr_summary = {'loss': train_loss, 'acc': train_acc}
    val_summary = {'loss': val_loss, 'acc': val_acc}
    scheduler.step(val_summary['loss'])
    tqdm.write('epoch : {}, tr_loss: {:.3f}, val_loss: '
               '{:.3f}, tr_acc: {:.2%}, val_acc: {:.2%}'.format(
                   epoch + 1, tr_summary['loss'], val_summary['loss'],
                   tr_summary['acc'], val_summary['acc']))

    val_loss = val_summary['loss']
    is_best = val_loss < best_val_loss

    if is_best:
        state = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'opt_state_dict': optimizer.state_dict()
        }
        summary = {'train': tr_summary, 'validation': val_summary}

        sm.update(summary)
        sm.save('summary.json')
Esempio n. 35
0
if __name__ == "__main__":

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('gt_map', help='groundtruth seqmap')
    parser.add_argument('gt_folder', help='groundtruth label folder')
    parser.add_argument('res_folder', help='results folder')
    parser.add_argument("--category",
                        nargs="+",
                        default=["Car"],
                        choices=["Car", "Pedestrian"])
    parser.add_argument("--flag", type=str)
    parser.add_argument("--d_min", type=float, default=0)
    parser.add_argument("--d_max", type=float, default=100, required=True)

    args = parser.parse_args()
    tqdm.write(f"args = {args}")

    out_filename = os.path.join(
        args.res_folder,
        f"{args.flag}_{int(args.d_min)}_{int(args.d_max)}.txt")
    tqdm.write(f"output file name = {out_filename}")

    eval_tracks(
        args.gt_map,
        args.gt_folder,
        args.res_folder,
        args.category,
        args.d_min,
        args.d_max,
        out_filename,
    )
Esempio n. 36
0
def evaluate(gt_datas: dict, pd_datas: dict, d_min: float, d_max: float,
             out_file: TextIO) -> None:
    """Evaluate tracking output.

    Args:
        gt_datas: path to dataset
        pd_datas: list of path to tracker output
        d_min: minimum distance range
        d_max: maximum distance range
        out_file: output file object
    """
    acc_c = mm.MOTAccumulator(auto_id=True)
    acc_i = mm.MOTAccumulator(auto_id=True)
    acc_o = mm.MOTAccumulator(auto_id=True)

    ID_gt_all: List[str] = []

    count_all: int = 0
    fr_count: int = 0

    tqdm.write(f"{len(pd_datas)} {len(gt_datas)}")
    assert len(pd_datas) == len(gt_datas)

    pbar = tqdm(zip(pd_datas.items(), gt_datas.items()), total=len(gt_datas))
    for (log_id_pd, pd_data), (log_id_gt, gt_data) in pbar:
        fr_count += len(pd_data['frames'])
        pbar.set_postfix_str(s=f"Logs: {log_id_gt} AccumFrames: {fr_count} | "
                             f"PD: {len(pd_data['frames'])} "
                             f"GT: {len(gt_data['frames'])}]")

        assert len(pd_data['frames']) == len(gt_data['frames'])
        assert log_id_pd == log_id_gt

        for (_, hypos), (_, annos) in \
                zip(pd_data['frames'].items(), gt_data['frames'].items()):

            # Get entries in GT and PD
            gt, id_gts = create_entry(annos['annotations'], d_min, d_max)
            tracks, id_tracks = create_entry(hypos['annotations'], d_min,
                                             d_max)

            ID_gt_all.append(np.unique(id_gts).tolist())

            dists_c: List[List[float]] = []
            dists_i: List[List[float]] = []
            dists_o: List[List[float]] = []
            for _, gt_value in gt.items():
                gt_track_data_c: List[float] = []
                gt_track_data_i: List[float] = []
                gt_track_data_o: List[float] = []
                dists_c.append(gt_track_data_c)
                dists_i.append(gt_track_data_i)
                dists_o.append(gt_track_data_o)
                for _, track_value in tracks.items():
                    count_all += 1
                    gt_track_data_c.append(
                        get_distance(gt_value, track_value, "centroid"))
                    gt_track_data_i.append(
                        get_distance(gt_value, track_value, "iou"))
                    gt_track_data_o.append(
                        get_distance(gt_value, track_value, "orientation"))

            acc_c.update(id_gts, id_tracks, dists_c)
            acc_i.update(id_gts, id_tracks, dists_i)
            acc_o.update(id_gts, id_tracks, dists_o)

    ID_gt_all = np.unique([item for lists in ID_gt_all for item in lists])

    if count_all == 0:
        # fix for when all hypothesis is empty,
        # pymotmetric currently doesn't support this, see https://github.com/cheind/py-motmetrics/issues/49
        acc_c.update(id_gts, [-1], np.ones(np.shape(id_gts)) * np.inf)
        acc_i.update(id_gts, [-1], np.ones(np.shape(id_gts)) * np.inf)
        acc_o.update(id_gts, [-1], np.ones(np.shape(id_gts)) * np.inf)

    tqdm.write("Computing...")
    summary = mh.compute(
        acc_c,
        metrics=[
            "num_frames",
            "mota",
            "motp",
            "idf1",
            "mostly_tracked",
            "mostly_lost",
            "num_false_positives",
            "num_misses",
            "num_switches",
            "num_fragmentations",
        ],
        name="acc",
    )
    tqdm.write(f"summary = \n{summary}")
    num_tracks = len(ID_gt_all)
    if num_tracks == 0:
        num_tracks = 1

    num_frames = summary["num_frames"][0]
    mota = summary["mota"][0] * 100
    motp_c = summary["motp"][0]
    idf1 = summary["idf1"][0]
    most_track = summary["mostly_tracked"][0] / num_tracks
    most_lost = summary["mostly_lost"][0] / num_tracks
    num_fp = summary["num_false_positives"][0]
    num_miss = summary["num_misses"][0]
    num_switch = summary["num_switches"][0]
    num_frag = summary["num_fragmentations"][0]

    #acc_c.events.loc[acc_c.events.Type != "RAW",
    #                 "D"] = acc_i.events.loc[acc_c.events.Type != "RAW", "D"]

    sum_motp_i = mh.compute(acc_i, metrics=["motp"], name="acc")
    tqdm.write(f"MOTP-I = \n{sum_motp_i}")

    motp_i = sum_motp_i["motp"][0]

    # acc_c.events.loc[acc_c.events.Type != "RAW",
    #                 "D"] = acc_o.events.loc[acc_c.events.Type != "RAW", "D"]
    sum_motp_o = mh.compute(acc_o, metrics=["motp"], name="acc")
    tqdm.write(f"MOTP-O = \n{sum_motp_o}")

    motp_o = sum_motp_o["motp"][0]

    out_string = (f"{num_frames} {mota:.2f} "
                  f"{motp_c:.2f} {motp_o:.2f} {motp_i:.2f} "
                  f"{idf1:.2f} {most_track:.2f} {most_lost:.2f} "
                  f"{num_fp} {num_miss} {num_switch} {num_frag}\n")
    out_file.write(out_string)
Esempio n. 37
0
def main():
    checkpoint_path = args.checkpoint_path
    base_dir = checkpoint_path.parent.parent.parent.parent
    snapshot_name = checkpoint_path.parent.parent.name
    lmdb_dir = (base_dir / 'lmdb' / snapshot_name)
    with (lmdb_dir / 'meta.json').open('r') as f:
        meta_dict = json.load(f)
        mat_id_to_label = meta_dict['mat_id_to_label']
        label_to_mat_id = {v: k for k, v in mat_id_to_label.items()}

    with (checkpoint_path.parent / 'model_params.json').open('r') as f:
        model_params = json.load(f)

    color_binner = None
    if 'color_hist_space' in model_params:
        color_binner = ColorBinner(
            space=model_params['color_hist_space'],
            shape=tuple(model_params['color_hist_shape']),
            sigma=tuple(model_params['color_hist_sigma']),
        )

    print(f'Loading checkpoint from {checkpoint_path!s}')
    checkpoint = torch.load(checkpoint_path)

    if not args.out_name:
        # TODO: remove this ugly thing. (There's no reason to the +1 we did)
        out_name = str(checkpoint['epoch'] - 1)
    else:
        out_name = args.out_name

    model_name = checkpoint_path.parent.name
    out_dir = (base_dir / 'inference' / snapshot_name / model_name / out_name)

    model = RendNet3.from_checkpoint(checkpoint)
    model.train(False)
    model = model.cuda()

    yy = input(f'Will save to {out_dir!s}, continue? (y/n): ')
    if yy != 'y':
        return

    out_dir.mkdir(exist_ok=True, parents=True)

    filters = []
    if args.category:
        filters.append(ExemplarShapePair.shape.has(category=args.category))

    print(f'Loading pairs')
    with session_scope() as sess:
        pairs, count = controllers.fetch_pairs_default(sess, filters=filters)
        materials = sess.query(models.Material).all()
        mat_by_id = {m.id: m for m in materials}

    pairs = [
        pair for pair in pairs
        if args.overwrite or not (Path(out_dir, f'{pair.id}.json').exists())
    ]

    pbar = tqdm(pairs)
    for pair in pbar:
        out_path = Path(out_dir, f'{pair.id}.json')
        if not args.overwrite and out_path.exists():
            continue

        if not pair.data_exists(config.PAIR_SHAPE_CLEAN_SEGMENT_MAP_NAME):
            tqdm.write(f'clean segment map not exists')
            continue
        pbar.set_description(f'Pair {pair.id}')

        exemplar = pair.exemplar
        shape = (224, 224)
        exemplar_im = pair.exemplar.load_cropped_image()
        exemplar_im = skimage.transform.resize(exemplar_im,
                                               shape,
                                               anti_aliasing=True,
                                               order=3,
                                               mode='constant',
                                               cval=1)
        # if not exemplar.data_exists(exemplar.get_image_name(shape)):
        #     exemplar_im = resize(pair.exemplar.load_cropped_image(),
        #                          shape, order=3)
        #     exemplar.save_data(exemplar.get_image_name(shape), exemplar_im)
        # else:
        #     exemplar_im = exemplar.load_data(exemplar.get_image_name(shape))

        segment_map = pair.load_data(
            config.PAIR_SHAPE_CLEAN_SEGMENT_MAP_NAME) - 1
        substance_map = pair.exemplar.load_data(config.EXEMPLAR_SUBST_MAP_NAME)
        substance_map = resize(substance_map, segment_map.shape, order=0)

        vis.image(exemplar_im.transpose((2, 0, 1)), win='exemplar-image')

        result_dict = {'pair_id': pair.id, 'segments': {}}

        subst_id_by_seg_id = compute_segment_substances(
            pair,
            return_ids=True,
            segment_map=segment_map,
            substance_map=substance_map)

        for seg_id in [s for s in np.unique(segment_map) if s >= 0]:
            seg_mask = (segment_map == seg_id)
            topk_dict = compute_topk(
                label_to_mat_id,
                model,
                exemplar_im,
                seg_mask,
                minc_substance=SUBSTANCES[subst_id_by_seg_id[seg_id]],
                color_binner=color_binner,
                mat_by_id=mat_by_id)
            result_dict['segments'][str(seg_id)] = topk_dict

        with open(Path(out_path), 'w') as f:
            json.dump(result_dict, f, indent=2)
Esempio n. 38
0
args = parser.parse_args()
# Reference
# https://towardsdatascience.com/the-easiest-way-to-download-youtube-videos-using-python-2640958318ab

file = open(args.urls, 'r')
lines = file.readlines()
done = []
totalLengthSeconds = 0
for i, line in enumerate(tqdm(lines, desc='Downloading', unit='video')):
    line = line.strip()
    sections = line.split("#")
    if len(sections) > 1:
        line = sections[0].strip()
    if len(line) <= 0:
        continue
    tqdm.write(line)
    if not line in done:
        name = "YouTube"
        while name == "YouTube":
            try:
                video = YouTube(line)
                name = video.title
                if name == "YouTube":
                    tqdm.write("Bad name")
                    continue
                tqdm.write('Video: "' + name + '"')
                if len(
                        video.streams.filter(file_extension="mp4").filter(
                            res=str(args.resolution) + 'p',
                            fps=args.fps)) != 1:
                    for s in video.streams.filter(
Esempio n. 39
0
 def emit(self, record):
     msg = self.format(record)
     tqdm.write(msg)
            'labels': batch[2]
        }

        # Inputs to the model: https://huggingface.co/transformers/model_doc/bert.html#bertforsequenceclassification
        outputs = model(**inputs)  # Returns a tuple of loss and the logits
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        progress_bar.set_postfix(
            {'training_loss': '{}'.format(loss.item() / len(batch))})

    tqdm.write('\nEpoch {}'.format(epoch))

    loss_train_avg = loss_train_total / len(dataloader_train)

    tqdm.write('Training Loss: {}'.format(loss_train_avg))

    val_loss, predictions, true_vals = evaluate(dataloader_val)
    val_f1 = f1_score_func(predictions, true_vals)

    tqdm.write('Validation Loss: {}'.format(val_loss))
    tqdm.write('F1 score(weighted): {}'.format(val_f1))

    if val_f1 > best_f1:
        best_f1 = val_f1
        print('Saving the model...')
        torch.save(model.state_dict(), './Models/best_model.model')
Esempio n. 41
0
def download(directory, username, password, size, recent, \
    until_found, download_videos, force_size, auto_delete, \
    smtp_username, smtp_password, notification_email):
    """Download all iCloud photos to a local directory"""

    if not notification_email:
        notification_email = smtp_username

    icloud = authenticate(username, password, smtp_username, smtp_password,
                          notification_email)

    directory = os.path.normpath(directory)

    print("Looking up all photos...")
    photos = icloud.photos.all
    photos_count = len(photos)

    # Optional: Only download the x most recent photos.
    if recent is not None:
        photos_count = recent
        photos = (p for i, p in enumerate(photos) if i < recent)

    kwargs = {'total': photos_count}

    if until_found is not None:
        del kwargs['total']
        photos_count = '???'

        # ensure photos iterator doesn't have a known length
        photos = (p for p in photos)

    if download_videos:
        print("Downloading %s %s photos and videos to %s/ ..." %
              (photos_count, size, directory))
    else:
        print("Downloading %s %s photos to %s/ ..." %
              (photos_count, size, directory))

    consecutive_files_found = 0
    progress_bar = tqdm(photos, **kwargs)

    for photo in progress_bar:
        for _ in range(MAX_RETRIES):
            try:
                if not download_videos \
                    and not photo.filename.lower().endswith(('.png', '.jpg', '.jpeg')):

                    progress_bar.set_description(
                        "Skipping %s, only downloading photos." %
                        photo.filename)
                    continue

                created_date = photo.created

                date_path = '{:%Y/%m/%d}'.format(created_date)
                download_dir = os.path.join(directory, date_path)

                if not os.path.exists(download_dir):
                    os.makedirs(download_dir)

                download_path = local_download_path(photo, size, download_dir)
                if os.path.isfile(download_path):
                    if until_found is not None:
                        consecutive_files_found += 1
                    progress_bar.set_description(
                        "%s already exists." %
                        truncate_middle(download_path, 96))
                    break

                download_photo(photo, download_path, size, force_size,
                               download_dir, progress_bar)
                if until_found is not None:
                    consecutive_files_found = 0
                break

            except (requests.exceptions.ConnectionError, socket.timeout):
                tqdm.write('Connection failed, retrying after %d seconds...' %
                           WAIT_SECONDS)
                time.sleep(WAIT_SECONDS)

        else:
            tqdm.write("Could not process %s! Maybe try again later." %
                       photo.filename)

        if until_found is not None and consecutive_files_found >= until_found:
            tqdm.write(
                'Found %d consecutive previusly downloaded photos.  Exiting' %
                until_found)
            progress_bar.close()
            break

    print("All photos have been downloaded!")

    if auto_delete:
        print("Deleting any files found in 'Recently Deleted'...")

        recently_deleted = icloud.photos.albums['Recently Deleted']

        for media in recently_deleted:
            created_date = media.created
            date_path = '{:%Y/%m/%d}'.format(created_date)
            download_dir = os.path.join(directory, date_path)

            filename = filename_with_size(media, size)
            path = os.path.join(download_dir, filename)

            if os.path.exists(path):
                print("Deleting %s!" % path)
                os.remove(path)
Esempio n. 42
0
    def dataTesting(self, dataSourceA, dataSourceB):
        data = newTesting(dataSourceA, dataSourceB)
        for choice in (dataSourceA[1], dataSourceB[1]):
            num_data = data.DataNum[choice]  # 获取数据的数量
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数
            overall_p = 0
            overall_n = 0
            overall_tp = 0
            overall_tn = 0

            start = time.time()
            # data_count = 200
            pbar = tqdm(range(num_data))
            for i in pbar:
                stuff = data.GetData(
                    (ran_num + i) % num_data,
                    dataType=choice,
                    feature_type=self.featureType)  # 从随机数开始连续向后取一定数量数据
                if self.featureType in ('both', 'Both', 'BOTH'):
                    dataSpec, dataMfcc, data_labels = stuff
                    data_input = [dataSpec, dataMfcc]
                else:
                    data_input, data_labels = stuff
                    data_input = data_input[np.newaxis, :]
                data_pre = self.model.predict_on_batch(data_input)
                if self.voting == False:
                    predictions = np.argmax(data_pre[0], axis=0)
                else:
                    predictions = sum([
                        np.argmax(element[0], axis=0) for element in data_pre
                    ])
                    predictions = 1 if predictions >= 2 else 0
                tp, fp, tn, fn = Comapare2(predictions,
                                           data_labels[0])  # 计算metrics
                overall_p += tp + fn
                overall_n += tn + fp
                overall_tp += tp
                overall_tn += tn

            if overall_p != 0:
                sensitivity = overall_tp / overall_p * 100
                sensitivity = round(sensitivity, 2)
            else:
                sensitivity = 'None'
            if overall_n != 0:
                specificity = overall_tn / overall_n * 100
                specificity = round(specificity, 2)
            else:
                specificity = 'None'
            if sensitivity != 'None' and specificity != 'None':
                score = (sensitivity + specificity) / 2
                score = round(score, 2)
            else:
                score = 'None'
            accuracy = (overall_tp + overall_tn) / (overall_p +
                                                    overall_n) * 100
            accuracy = round(accuracy, 2)
            end = time.time()
            dtime = round(end - start, 2)

            strg = '*[泛化性测试结果] 片段类型【{0}】 敏感度:{1}%, 特异度: {2}%, 得分: {3}, 准确度: {4}%, 用时: {5}s.'.format(
                choice, sensitivity, specificity, score, accuracy, dtime)
            tqdm.write(strg)
            pbar.close()
Esempio n. 43
0
 def write_above_single_progress_bar(self, seq_no, line):
     tqdm.write(line)
Esempio n. 44
0
    def train(self):
        try:
            best_accuracy = 0
            epoch_log_file = os.path.join(self._result_log_base_path,
                                          "epoch_result.log")
            curr_learning = self._config["learning_rate"]
            minimum_learning_rate = self._config["minimum_learning_rate"]
            last_10_accuracy = 0.0
            for epoch in tqdm(range(self._epoches)):
                self._train_data_iterator.shuffle()
                losses = list()
                total = 0
                train_correct = 0
                file = os.path.join(
                    self._result_log_base_path,
                    "test_" + self._curr_time + "_" + str(epoch) + ".log")
                for i in tqdm(range(
                        self._train_data_iterator.batch_per_epoch)):
                    batch = self._train_data_iterator.get_batch()
                    batch.learning_rate = curr_learning
                    tag_predictions, segment_length_predictions, loss, optimizer, feed_dict = self._train_model.train(
                        batch)
                    tag_predictions, segment_length_predictions, loss, optimizer = self._session.run(
                        (tag_predictions, segment_length_predictions, loss,
                         optimizer),
                        feed_dict=feed_dict)
                    total += batch.size
                    train_correct += self._check_predictions(
                        tag_predictions=tag_predictions,
                        segment_length_predictions=segment_length_predictions,
                        ground_truth=batch.ground_truth,
                        ground_truth_segment_length=batch.
                        ground_truth_segment_length,
                        ground_truth_segmentation_length=batch.
                        ground_truth_segmentation_length,
                        question_length=batch.questions_length)
                    losses.append(loss)
                train_acc = train_correct / total

                self._dev_data_iterator.shuffle()
                dev_accuracy = self.test(self._dev_data_iterator, is_log=False)

                average_loss = np.average(np.array(losses))
                tqdm.write(
                    "epoch: %d, loss: %f, train_acc: %f, dev_acc: %f, learning_rate: %f"
                    % (epoch, average_loss, train_acc, dev_accuracy,
                       curr_learning))

                if dev_accuracy > best_accuracy:
                    best_accuracy = dev_accuracy
                    self._saver.save(self._session, self._best_checkpoint_file)

                # Learning rate decay:
                if epoch > 0 and epoch % 20 == 0:
                    if dev_accuracy <= last_10_accuracy and curr_learning > minimum_learning_rate:
                        curr_learning /= 2
                    last_10_accuracy = dev_accuracy

                self._epoch_log(file=epoch_log_file,
                                num_epoch=epoch,
                                train_accuracy=train_acc,
                                dev_accuracy=dev_accuracy,
                                average_loss=average_loss)

        except (KeyboardInterrupt, SystemExit):
            # If the user press Ctrl+C...
            # Save the model
            # tqdm.write("===============================")
            # tqdm.write(str(self._batch.word_character_matrix))
            # tqdm.write("*******************************")
            # tqdm.write(str(self._batch.word_character_length))
            # tqdm.write("===============================")
            self._saver.save(self._session, self._checkpoint_file)
        except ValueError as e:
            print(e)
Esempio n. 45
0
def train(
    net,
    optimizer,
    criterion,
    data_loader,
    epoch,
    scheduler=None,
    display_iter=100,
    device=torch.device("cpu"),
    display=None,
    val_loader=None,
    supervision="full",
):
    """
    Training loop to optimize a network for several epochs and a specified loss

    Args:
        net: a PyTorch model
        optimizer: a PyTorch optimizer
        data_loader: a PyTorch dataset loader
        epoch: int specifying the number of training epochs
        criterion: a PyTorch-compatible loss function, e.g. nn.CrossEntropyLoss
        device (optional): torch device to use (defaults to CPU)
        display_iter (optional): number of iterations before refreshing the
        display (False/None to switch off).
        scheduler (optional): PyTorch scheduler
        val_loader (optional): validation dataset
        supervision (optional): 'full' or 'semi'
    """

    if criterion is None:
        raise Exception("Missing criterion. You must specify a loss function.")

    net.to(device)

    save_epoch = epoch // 20 if epoch > 20 else 1

    losses = np.zeros(1000000)
    mean_losses = np.zeros(100000000)
    iter_ = 1
    loss_win, val_win = None, None
    val_accuracies = []

    for e in tqdm(range(1, epoch + 1), desc="Training the network"):
        # Set the network to training mode
        net.train()
        avg_loss = 0.0

        # Run the training loop for one epoch
        for batch_idx, (data, target) in tqdm(enumerate(data_loader),
                                              total=len(data_loader)):
            # Load the data into the GPU if required
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            if supervision == "full":
                output = net(data)
                loss = criterion(output, target)
            elif supervision == "semi":
                outs = net(data)
                output, rec = outs
                loss = criterion[0](
                    output,
                    target) + net.aux_loss_weight * criterion[1](rec, data)
            else:
                raise ValueError(
                    'supervision mode "{}" is unknown.'.format(supervision))
            loss.backward()
            optimizer.step()

            avg_loss += loss.item()
            losses[iter_] = loss.item()
            mean_losses[iter_] = np.mean(losses[max(0, iter_ - 100):iter_ + 1])

            if display_iter and iter_ % display_iter == 0:
                string = "Train (epoch {}/{}) [{}/{} ({:.0f}%)]\tLoss: {:.6f}"
                string = string.format(
                    e,
                    epoch,
                    batch_idx * len(data),
                    len(data) * len(data_loader),
                    100.0 * batch_idx / len(data_loader),
                    mean_losses[iter_],
                )
                update = None if loss_win is None else "append"
                loss_win = display.line(
                    X=np.arange(iter_ - display_iter, iter_),
                    Y=mean_losses[iter_ - display_iter:iter_],
                    win=loss_win,
                    update=update,
                    opts={
                        "title": "Training loss",
                        "xlabel": "Iterations",
                        "ylabel": "Loss",
                    },
                )
                tqdm.write(string)

                if len(val_accuracies) > 0:
                    val_win = display.line(
                        Y=np.array(val_accuracies),
                        X=np.arange(len(val_accuracies)),
                        win=val_win,
                        opts={
                            "title": "Validation accuracy",
                            "xlabel": "Epochs",
                            "ylabel": "Accuracy",
                        },
                    )
            iter_ += 1
            del (data, target, loss, output)

        # Update the scheduler
        avg_loss /= len(data_loader)
        if val_loader is not None:
            val_acc = val(net,
                          val_loader,
                          device=device,
                          supervision=supervision)
            val_accuracies.append(val_acc)
            metric = -val_acc
        else:
            metric = avg_loss

        if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(metric)
        elif scheduler is not None:
            scheduler.step()

        # Save the weights
        if e % save_epoch == 0:
            save_model(
                net,
                camel_to_snake(str(net.__class__.__name__)),
                data_loader.dataset.name,
                epoch=e,
                metric=abs(metric),
            )
Esempio n. 46
0
loss = torch.nn.BCELoss()

loss_list = []

for e in tqdm(range(num_epochs)):
    total_loss = 0.0
    for batch in data_loader:
        optimizer.zero_grad()
        model_input = batch[0]
        label = batch[1].view(-1, 1)
        model_output = model(model_input)
        l = loss(model_output, label)
        l.backward()
        optimizer.step()
        total_loss += l
    tqdm.write("loss: " + str(total_loss.item()))
    loss_list.append(total_loss.item())

if (args.save_model):
    from pathlib import Path
    print("Saving the trained model")
    this_filepath = Path(os.path.abspath(__file__))
    this_dirpath = this_filepath.parent

    model_path = os.path.join(this_dirpath, "model")
    if not (os.path.exists(model_path)):
        os.makedirs(model_path)
    model_path = os.path.join(model_path, "fnn.log")
    torch.save(model, model_path)

plt.plot(loss_list)
Esempio n. 47
0
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

to_process = []

with open("errors.txt", "r") as infile:
    to_process = [int(l.strip()) for l in infile.read().splitlines()]

for i in tqdm(to_process):  # latest document as of Feb 2 2018
    # try:
    tqdm.write("Processing document #" + str(i))
    document = requests.get(
        "http://www.presidency.ucsb.edu/ws/print.php?pid=" + str(i)).text
    soup = BeautifulSoup(document, 'html.parser')

    title = soup.title.contents[0].replace("\xa0", " ").replace("/", ":")
    if len(title) > 200:
        title = title[:97] + "..." + title[-100:]
    content = soup.find('span', {'class': 'style9'}).text

    with open(title + "." + str(i) + ".txt", "w") as outfile:
        outfile.write(content)
# except Exception as e:
#     print(e)
#     with open("errors2.txt", "a") as outfile:
#         outfile.write(str(i) + "\n")
#     continue
Esempio n. 48
0
    def train_loop(self, epoch, train_loader, optimizer, args):
        """
        Run a train loop.

        :param epoch: the epoch # (used for logging)
        :param train_loader: a torch.utils.data.DataLoader generated from
            data.datamgr.SetDataManager
        :param optimizer: a torch.optim.Optimzer
        :param args: other args passed to the script

        :returns: a dictionary of metrics: train_acc, train_loss, cls_loss, and
            lang_loss if applicable
        """
        avg_loss = 0
        avg_cls_loss = 0
        avg_lang_loss = 0
        acc_all = []
        for i, (x, target, (lang, lang_length,
                            lang_mask)) in enumerate(train_loader):
            self.n_query = x.size(1) - self.n_support

            optimizer.zero_grad()

            if self.lsl or self.l3:  # Load language
                # Trim padding to max length in batch
                max_lang_length = lang_length.max()
                lang = lang[:, :, :max_lang_length]
                lang_mask = lang_mask[:, :, :max_lang_length]
                lang = lang.cuda()
                lang_length = lang_length.cuda()
                lang_mask = lang_mask.cuda()

            # ==== CLASSIFICATION LOSS ===-
            if self.l3:
                cls_loss, z_support, z_query = self.set_forward_loss_l3(
                    x, (lang, lang_length), return_z=True)
            else:
                cls_loss, z_support, z_query = self.set_forward_loss(
                    x, return_z=True)
            loss = cls_loss

            # ==== LANGUAGE LOSS ====
            if self.lsl or self.l3:
                lang_loss = self.set_lang_loss(z_support, z_query, lang,
                                               lang_length, lang_mask)
                lang_loss = args.lang_lambda * lang_loss
                loss = loss + lang_loss
                avg_lang_loss = avg_lang_loss + lang_loss.item()

            loss.backward()
            optimizer.step()
            avg_loss = avg_loss + loss.item()
            avg_cls_loss = avg_cls_loss + cls_loss.item()

            if self.l3:
                # Stick to just 1 inference at train time since evaluating
                # accuracy is expensive
                correct_this, count_this = self.correct_l3(x, n_infer=1)
            else:
                correct_this, count_this = self.correct(x)

            acc_all.append(correct_this / count_this * 100)

        metrics = {
            "train_acc": None,
            "train_loss": None,
            "cls_loss": None,
            "lang_loss": None,
        }
        metrics["train_loss"] = avg_loss / (i + 1)
        metrics["cls_loss"] = avg_cls_loss / (i + 1)
        tqdm.write("Epoch {:d} | Loss {:f}".format(epoch,
                                                   metrics["train_loss"]))

        if self.lsl:
            metrics["lang_loss"] = avg_lang_loss / (i + 1)
            tqdm.write("Epoch {:d} | Lang Loss {:f}".format(
                epoch, metrics["lang_loss"]))

        metrics["train_acc"] = np.mean(acc_all)
        tqdm.write("Epoch {:d} | Train Acc {:.2f}".format(
            epoch, metrics["train_acc"]))

        return metrics
Esempio n. 49
0
 def check_alignments(self, frame):
     """ If we have no alignments for this image, skip it """
     have_alignments = self.alignments.frame_exists(frame)
     if not have_alignments:
         tqdm.write("No alignment found for {}, " "skipping".format(frame))
     return have_alignments
Y = data['fraud_ind']
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=0)
print(
    f'X_train.shape={X_train.shape}, X_test.shape={X_test.shape}, Y_train.shape={Y_train.shape}, Y_test.shape={Y_test.shape}'
)

models = []
models.append(('LR', LogisticRegression()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('SVM', LinearSVC()))
models.append(('RF', RandomForestClassifier()))
models.append(('LOF', LocalOutlierFactor()))

results = []
names = []

for name, model in tqdm(models):
    tqdm.write(f'Training {name}...')
    kfold = KFold(n_splits=10, random_state=42)
    cv_f1_scores = cross_val_score(model,
                                   X_train,
                                   Y_train,
                                   cv=kfold,
                                   scoring='f1')
    results.append(cv_f1_scores)
    names.append(name)
    tqdm.write(f'{name}: {cv_f1_scores.mean()} ({cv_f1_scores.std()})')
Esempio n. 51
0
    def __init__(self, cnn, archive, name, org, subnet, date, GamitConfig, stations, ties=(), centroid=()):
        """
        The GAMIT session object creates all the directory structure and configuration files according to the parameters
        set in GamitConfig. Two stations list are passed and merged to create the session
        :param cnn: connection to database object
        :param archive: archive object to find rinex files in archive structure
        :param name: name of the project/network
        :param org: name of the organization
        :param subnet: subnet number (may be None, in which case the directory name will not show ORGXX
        :param date: date that is being processed
        :param GamitConfig: configuration to run gamit
        :param stations: list of stations to be processed
        :param ties: tie stations as obtained by pyNetwork
        """
        self.NetName = name
        self.org     = org
        self.subnet  = subnet

        if subnet is not None:
            self.DirName = '%s.%s%02i' % (self.NetName, self.org, self.subnet)
        else:
            self.DirName = self.NetName

        self.date           = date
        self.GamitOpts      = GamitConfig.gamitopt  # type: pyGamitConfig.GamitConfiguration().gamitopt
        self.Config         = GamitConfig           # type: pyGamitConfig.GamitConfiguration
        self.frame          = None
        self.params         = None
        # to store the polyhedron read from the final SINEX
        self.polyhedron     = None
        self.VarianceFactor = None
        # gamit task will be filled with the GamitTask object
        self.GamitTask      = None

        self.solution_base = self.GamitOpts['solutions_dir'].rstrip('/')

        # tie station dictionary (to build KMLs, do not change)
        self.tie_dict = [{'name'  : stationID(stn),
                          'coords': [(stn.lon, stn.lat)]}
                         for stn in ties]

        # station dictionary (to build KMLs, do not change)
        self.stations_dict = [{'name'   : stationID(stn),
                               'coords' : [(stn.lon, stn.lat)]}
                              for stn in stations]

        # make StationInstances
        station_instances = []
        for stn in stations:
            try:
                station_instances += [StationInstance(cnn, archive, stn, date, GamitConfig)]
            except pyRinexName.RinexNameException:
                tqdm.write(' -- WARNING (station instance): station %s on day %s appears to have a badly formed RINEX '
                           'filename. Please check the archive and make sure all filenames follow the RINEX 2/3 '
                           'convention. Station has been excluded from the GAMIT session.'
                           % (stationID(stn), date.yyyyddd()))

        # do the same with ties
        for stn in ties:
            try:
                station_instances += [StationInstance(cnn, archive, stn, date, GamitConfig, is_tie=True)]
            except pyRinexName.RinexNameException:
                tqdm.write(' -- WARNING (tie instance): station %s on day %s appears to have a badly formed RINEX '
                           'filename. Please check the archive and make sure all filenames follow the RINEX 2/3 '
                           'convention. Station has been excluded from the GAMIT session.'
                           % (stationID(stn), date.yyyyddd()))

        self.StationInstances = station_instances

        # create working dirs for this session
        last_path = '/%s/%s/%s' % (date.yyyy(), date.ddd(), self.DirName)
        self.solution_pwd = self.solution_base + last_path
        # the remote pwd is the directory where the processing will be performed
        self.remote_pwd   = 'production/gamit' + last_path

        row_key = {'Year'    : date.year,
                   'DOY'     : date.doy,
                   'Project' : self.NetName,
                   'subnet'  : 0 if subnet is None else subnet}

        try:
            # attempt to retrieve the session from the database. If error is raised, then the session has to be
            # reprocessed
            cnn.get('gamit_stats', row_key.copy())
            self.ready = True
        except:
            self.ready = False

            try:
                # since ready == False, then try to delete record in subnets
                cnn.delete('gamit_subnets', row_key.copy())
            except:
                pass

        # a list to report missing data for this session
        self.missing_data = []

        if not os.path.exists(self.solution_pwd):
            # if the path does not exist, create it!
            os.makedirs(self.solution_pwd)
            # force ready = False, no matter what the database says
            self.ready = False
            try:
                cnn.delete('gamit_stats',   row_key.copy())
                cnn.delete('gamit_subnets', row_key.copy())
            except:
                pass

        elif os.path.exists(self.solution_pwd) and not self.ready:
            # if the solution directory exists but the session is not ready, kill the directory
            rmtree(self.solution_pwd)

        if not self.ready:
            # insert the subnet in the database
            cnn.insert('gamit_subnets', {**row_key,
                                         'stations' : '{%s}' % ','.join(stationID(s)   for s in stations + list(ties)),
                                         'alias'    : '{%s}' % ','.join(s.StationAlias for s in stations + list(ties)),
                                         'ties'     : '{%s}' % ','.join(s['name']      for s in self.tie_dict),
                                         'centroid' : '{%s}' % ','.join('%.1f' % c     for c in centroid)})

        self.pwd_igs    = os.path.join(self.solution_pwd, 'igs')
        self.pwd_brdc   = os.path.join(self.solution_pwd, 'brdc')
        self.pwd_rinex  = os.path.join(self.solution_pwd, 'rinex')
        self.pwd_tables = os.path.join(self.solution_pwd, 'tables')
        self.pwd_glbf   = os.path.join(self.solution_pwd, 'glbf')
        self.pwd_proc   = os.path.join(self.solution_pwd, date.ddd())

        if not self.ready:
            # only create folders, etc if it was determined the solution isn't ready
            if not os.path.exists(self.pwd_igs):
                os.makedirs(self.pwd_igs)

            if not os.path.exists(self.pwd_brdc):
                os.makedirs(self.pwd_brdc)

            if os.path.exists(self.pwd_rinex):
                # delete any possible rinex files from a truncated session
                rmtree(self.pwd_rinex)
            os.makedirs(self.pwd_rinex)

            if not os.path.exists(self.pwd_tables):
                os.makedirs(self.pwd_tables)

            # check that the processing directory doesn't exist.
            # if it does, remove (it has already been determined that the solution is not ready
            if os.path.exists(self.pwd_glbf):
                rmtree(self.pwd_glbf)

            if os.path.exists(self.pwd_proc):
                rmtree(self.pwd_proc)

            self.generate_kml()
Esempio n. 52
0
def main(data_path, results_file, config):
    ####################################################################################
    # Previous operations
    ####################################################################################
    ###    layers = config['layers']
    ###    L = len(layers)

    conv_kernels = config['conv_kernels']
    conv_filters = config['conv_filters']
    num_classes = config['num_classes']

    tf.reset_default_graph(
    )  # Clear the tensorflow graph (free reserved memory)

    ####################################################################################
    # Inputs setup
    ####################################################################################
    max_sentence_len = config['max_sentence_len']

    # feedforward_inputs (FFI): inputs for the feedforward network (i.e. the encoder).
    # Should contain the labeled training data (padded to max_sentence_len).
    feedforward_inputs = tf.placeholder(tf.int32,
                                        shape=(None, max_sentence_len),
                                        name="FFI")

    # autoencoder_inputs (AEI): inputs for the autoencoder (encoder + decoder).
    # Should contain the unlabeled training data (also padded to max_sentence_len).
    autoencoder_inputs = tf.placeholder(tf.int32,
                                        shape=(None, max_sentence_len),
                                        name="AEI")

    outputs = tf.placeholder(tf.float32)  # target
    training = tf.placeholder(tf.bool)  # training or evaluation

    # Not quite sure what is this for
    FFI = tf.reshape(feedforward_inputs, [-1] + [max_sentence_len])
    AEI = tf.reshape(autoencoder_inputs, [-1] + [max_sentence_len])

    ####################################################################################
    # Embeddings weights
    ####################################################################################

    embeddings_size = config['embeddings_size']
    vocab_size = config['vocab_size']
    embeddings_weights = tf.get_variable("embeddings",
                                         (vocab_size, embeddings_size),
                                         trainable=False)
    # initializer=tf.random_normal_initializer())

    place = tf.placeholder(tf.float32, shape=(vocab_size, embeddings_size))
    set_embeddings_weights = embeddings_weights.assign(place)

    FFI_embeddings = tf.expand_dims(tf.nn.embedding_lookup(
        embeddings_weights, FFI),
                                    axis=-1,
                                    name="FFI_embeddings")

    AEI_embeddings = tf.expand_dims(tf.nn.embedding_lookup(
        embeddings_weights, AEI),
                                    axis=-1,
                                    name="AEI_embeddings")

    ####################################################################################
    # Batch normalization setup & functions
    ####################################################################################
    # to calculate the moving averages of mean and variance
    # ewma = tf.train.ExponentialMovingAverage(decay=0.99)
    # # this list stores the updates to be made to average mean and variance
    # bn_assigns = []

    # def update_batch_normalization(batch, output_name="bn", scope_name="BN"):
    #     dim = len(batch.get_shape().as_list())
    #     mean, var = tf.nn.moments(batch, axes=list(range(0, dim - 1)))
    #     # Function to be used during the learning phase.
    #     # Normalize the batch and update running mean and variance.
    #     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
    #         running_mean = tf.get_variable("running_mean",
    #                                        mean.shape,
    #                                        initializer=tf.constant_initializer(0))
    #         running_var = tf.get_variable("running_var",
    #                                       mean.shape,
    #                                       initializer=tf.constant_initializer(1))

    #     assign_mean = running_mean.assign(mean)
    #     assign_var = running_var.assign(var)
    #     bn_assigns.append(ewma.apply([running_mean, running_var]))

    #     with tf.control_dependencies([assign_mean, assign_var]):
    #         z = (batch - mean) / tf.sqrt(var + 1e-10)
    #         return tf.identity(z, name=output_name)

    def batch_normalization(batch, output_name="bn"):

        dim = len(batch.get_shape().as_list())
        mean, var = tf.nn.moments(batch, axes=list(range(0, dim - 1)))
        # if mean is None or var is None:
        #     dim = len(batch.get_shape().as_list())
        #     mean, var = tf.nn.moments(batch, axes=list(range(0, dim - 1)))
        z = (batch - mean) / tf.sqrt(var + tf.constant(1e-10))
        return tf.identity(z, name=output_name)

    ####################################################################################
    # Encoder
    ####################################################################################
    def encoder_layer(z_pre, noise_std, activation):
        # Run the layer
        # z_pre = run_layer(h, layer_spec, output_name="z_pre")

        # Compute mean and variance of z_pre (to be used in the decoder)
        dim = len(z_pre.get_shape().as_list())
        mean, var = tf.nn.moments(z_pre, axes=list(range(0, dim - 1)))
        # Create a variable to store the values for latter retrieving them
        _ = tf.identity(mean, name="mean"), tf.identity(var, name="var")

        # # Batch normalization
        # def training_batch_norm():
        #     if update_BN:
        #         z = update_batch_normalization(z_pre)
        #     else:
        #         z = batch_normalization(z_pre)
        #     return z

        # def eval_batch_norm():
        #     with tf.variable_scope("BN", reuse=tf.AUTO_REUSE):
        #         mean = ewma.average(tf.get_variable("running_mean",
        #                                             shape=z_pre.shape[-1]))
        #         var = ewma.average(tf.get_variable("running_var",
        #                                            shape=z_pre.shape[-1]))
        #     z = batch_normalization(z_pre, mean, var)
        #     return z

        # Perform batch norm depending to the phase (training or testing)
        # z = tf.cond(training, training_batch_norm, eval_batch_norm)
        z = batch_normalization(z_pre)
        z += tf.random_normal(tf.shape(z)) * noise_std
        z = tf.identity(z, name="z")

        # Center and scale plus activation
        size = z.get_shape().as_list()[-1]
        beta = tf.get_variable("beta", [size],
                               initializer=tf.constant_initializer(0))
        gamma = tf.get_variable("gamma", [size],
                                initializer=tf.constant_initializer(1))

        h = activation(z * gamma + beta)
        return tf.identity(h, name="h")

    def encoder(x, noise_std):
        # Perform encoding for each layer
        x += tf.random_normal(tf.shape(x)) * noise_std
        x = tf.identity(x, "h0")

        # Build the "wide" convolutional layer for each conv_kernel
        # This is the "first" layer
        conv_features = []
        weight_variables = []
        for i, ksize in enumerate(conv_kernels, start=1):
            with tf.variable_scope("encoder_bloc_" + str(i),
                                   reuse=tf.AUTO_REUSE):
                W = tf.get_variable(
                    "W", (ksize, embeddings_size, 1, conv_filters),
                    initializer=tf.truncated_normal_initializer())
                weight_variables.append(W)
                z_pre = tf.nn.conv2d(x,
                                     W,
                                     strides=[1, 1, 1, 1],
                                     padding="VALID",
                                     name="z_pre")
                h = encoder_layer(
                    z_pre,
                    noise_std,  # update_BN=update_BN,
                    activation=tf.nn.relu)
                h = tf.nn.max_pool(
                    h,
                    ksize=[1, max_sentence_len - ksize + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="global_max_pool")
                conv_features.append(h)

        # Build the features layer ("second" layer)
        total_kernels = len(conv_kernels)
        total_conv_features = total_kernels * conv_filters
        with tf.variable_scope("encoder_bloc_" + str(total_kernels + 1),
                               reuse=tf.AUTO_REUSE):
            h = tf.concat(conv_features, 3)
            h = tf.reshape(h, (-1, total_conv_features), name="h")

        # Build the features to classes layer ("last" layer)
        with tf.variable_scope("encoder_bloc_" + str(total_kernels + 2),
                               reuse=tf.AUTO_REUSE):
            W = tf.get_variable("W", (total_conv_features, num_classes),
                                initializer=tf.random_normal_initializer())
            weight_variables.append(W)

            print('h shape', h.shape)
            print('W shape', W.shape)

            z_pre = tf.matmul(h, W, name="z_pre")
            h = encoder_layer(
                z_pre,
                noise_std,  # update_BN=update_BN,
                activation=tf.nn.softmax)

        y = tf.identity(h, name="y")
        return y, weight_variables

    noise_std = config['noise_std']

    with tf.name_scope("FF_clean"):
        # output of the clean encoder. Used for prediction
        FF_y, weight_variables = encoder(FFI_embeddings,
                                         0)  # , update_BN=False)
    with tf.name_scope("FF_corrupted"):
        # output of the corrupted encoder. Used for training.
        FF_y_corr, _ = encoder(FFI_embeddings, noise_std)  # , update_BN=False)

    with tf.name_scope("AE_clean"):
        # corrupted encoding of unlabeled instances
        AE_y, _ = encoder(AEI_embeddings, 0)  # , update_BN=True)
    with tf.name_scope("AE_corrupted"):
        # corrupted encoding of unlabeled instances
        AE_y_corr, _ = encoder(AEI_embeddings, noise_std)  # , update_BN=False)

    l2_reg = tf.constant(0.0)
    for we_var in weight_variables:
        l2_reg += tf.nn.l2_loss(we_var)

    ####################################################################################
    # Decoder
    ####################################################################################

    def g_gauss(z_c, u, output_name="z_est", scope_name="denoising_func"):
        # gaussian denoising function proposed in the original paper
        size = u.get_shape().as_list()[-1]

        def wi(inits, name):
            return tf.Variable(inits * tf.ones([size]), name=name)

        with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
            a1 = wi(0., 'a1')
            a2 = wi(1., 'a2')
            a3 = wi(0., 'a3')
            a4 = wi(0., 'a4')
            a5 = wi(0., 'a5')

            a6 = wi(0., 'a6')
            a7 = wi(1., 'a7')
            a8 = wi(0., 'a8')
            a9 = wi(0., 'a9')
            a10 = wi(0., 'a10')

            mu = a1 * tf.sigmoid(a2 * u + a3) + a4 * u + a5
            v = a6 * tf.sigmoid(a7 * u + a8) + a9 * u + a10

            z_est = (z_c - mu) * v + mu
        return tf.identity(z_est, name=output_name)

    def get_tensor(input_name, num_encoder_bloc, name_tensor):
        return tf.get_default_graph().\
            get_tensor_by_name(input_name + "/encoder_bloc_" +
                               str(num_encoder_bloc) + "/" + name_tensor + ":0")

    denoising_cost = config['denoising_cost']
    d_cost = []
    u = batch_normalization(AE_y_corr, output_name="u_L")

    # Build first decoder layer (corresponding to the dense layer)
    total_kernels = len(conv_kernels)
    total_conv_features = total_kernels * conv_filters
    with tf.variable_scope("decoder_bloc_" + str(total_kernels + 2),
                           reuse=tf.AUTO_REUSE):
        z_corr = get_tensor("AE_corrupted", total_kernels + 2, "z")
        z = get_tensor("AE_clean", total_kernels + 2, "z")
        mean = get_tensor("AE_clean", total_kernels + 2, "mean")
        var = get_tensor("AE_clean", total_kernels + 2, "var")
        # Performs the decoding operations of a corresponding encoder bloc
        # Denoising
        z_est = g_gauss(z_corr, u)

        z_est_BN = (z_est - mean) / tf.sqrt(var + tf.constant(1e-10))
        z_est_BN = tf.identity(z_est_BN, name="z_est_BN")

        # run decoder layer
        V = tf.get_variable("V", (num_classes, total_conv_features),
                            initializer=tf.random_normal_initializer())
        l2_reg += tf.nn.l2_loss(V)
        u = tf.matmul(z_est, V)
        u = batch_normalization(u, output_name="u")

        d_cost.append(
            (tf.reduce_mean(tf.square(z_est_BN - z))) * denoising_cost[2])

    # Build second decoder layer (corresponding to the concatenation+flat layer)
    with tf.variable_scope("decoder_bloc_" + str(total_kernels + 1),
                           reuse=tf.AUTO_REUSE):
        u = tf.reshape(u, (-1, 1, 1, total_conv_features))
        deconv_features = tf.split(u, total_kernels, axis=3)

    # Build the final "wide convolutional" layer
    deconv_layers = []
    for i, gmp_layer in enumerate(deconv_features, start=1):
        ksize = conv_kernels[i - 1]
        with tf.variable_scope("decoder_bloc_" + str(i), reuse=tf.AUTO_REUSE):
            u = tf.keras.layers.UpSampling2D(size=(max_sentence_len - ksize +
                                                   1, 1))(gmp_layer)

            z_corr = get_tensor("AE_corrupted", i, "z")
            z = get_tensor("AE_clean", i, "z")
            mean = get_tensor("AE_clean", i, "mean")
            var = get_tensor("AE_clean", i, "var")
            z_est = g_gauss(z_corr, u)

            z_est_BN = (z_est - mean) / tf.sqrt(var + tf.constant(1e-10))
            z_est_BN = tf.identity(z_est_BN, name="z_est_BN")

            # run deconvolutional (transposed convolution) layer
            V = tf.get_variable("V", (ksize, embeddings_size, 1, conv_filters),
                                initializer=tf.truncated_normal_initializer())
            l2_reg += tf.nn.l2_loss(V)

            u = tf.nn.conv2d_transpose(z_est,
                                       V,
                                       output_shape=tf.shape(AEI_embeddings),
                                       strides=[1, 1, 1, 1],
                                       padding='VALID')
            u = batch_normalization(u, output_name="u")
            deconv_layers.append(u)
            d_cost.append(
                (tf.reduce_mean(tf.square(z_est_BN - z))) * denoising_cost[1])

    # last decoding step
    u = tf.concat(deconv_layers, 2)
    with tf.variable_scope("decoder_bloc_0", reuse=tf.AUTO_REUSE):
        z_corr = tf.get_default_graph().get_tensor_by_name("AE_corrupted/h0:0")
        z_corr = tf.concat([z_corr] * total_kernels, 2)
        z = tf.get_default_graph().get_tensor_by_name("AE_clean/h0:0")
        z = tf.concat([z] * total_kernels, 2)
        z_est = g_gauss(z_corr, u)
        d_cost.append(
            (tf.reduce_mean(tf.square(z_est - z))) * denoising_cost[0])

    ####################################################################################
    # Loss, accuracy and optimization
    ####################################################################################

    u_cost = tf.add_n(d_cost)  # reconstruction cost
    corr_pred_cost = -tf.reduce_mean(
        tf.reduce_sum(outputs * tf.log(FF_y_corr), 1))  # supervised cost
    clean_pred_cost = -tf.reduce_mean(tf.reduce_sum(outputs * tf.log(FF_y), 1))

    loss = corr_pred_cost + u_cost * config['u_cost_weight'] + config.get(
        "lambda", 0.0) * l2_reg  # total cost

    predictions = tf.argmax(FF_y, 1)
    correct_prediction = tf.equal(predictions, tf.argmax(outputs, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    # Optimization setting
    starter_learning_rate = config['starter_learning_rate']
    learning_rate = tf.Variable(starter_learning_rate, trainable=False)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    # add the updates of batch normalization statistics to train_step
    # bn_updates = tf.group(*bn_assigns)
    # with tf.control_dependencies([train_step]):
    #     train_step = tf.group(bn_updates)

    n = np.sum(
        [np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
    print("There is a total of %d trainable parameters" % n, file=sys.stderr)

    ####################################################################################
    # Training
    ####################################################################################
    print("===  Loading Data ===", file=sys.stderr)
    data, w2v_model = input_data_cnn_wide_ladder.read_data_sets(
        data_path,
        n_classes=config['num_classes'],
        n_labeled=config['num_labeled'],
        maxlen=max_sentence_len)
    num_examples = data.train.unlabeled_ds.instances.shape[0]

    batch_size = config['batch_size']
    num_epochs = config['num_epochs']

    num_iter = (num_examples //
                batch_size) * num_epochs  # number of loop iterations

    print("===  Starting Session ===", file=sys.stderr)
    dev_config = tf.ConfigProto()
    # Don't pre-allocate memory; allocate as-needed
    dev_config.gpu_options.allow_growth = True
    # Only allow a total of half the GPU memory to be allocated
    dev_config.gpu_options.per_process_gpu_memory_fraction = 1  #0.5
    sess = tf.Session(config=dev_config)

    if not os.path.exists(results_file):
        results_log = open(results_file, "w")
        print("experiment,split,epoch,accuracy,tloss,lloss,true,pred",
              file=results_log)

    else:
        results_log = open(results_file, "a")

    init = tf.global_variables_initializer()
    sess.run(init)

    print('=== Initializing embeddings with pre-trained weights ===')
    sess.run(set_embeddings_weights, feed_dict={place:
                                                w2v_model.syn0})  #.vectors})

    print("=== Training Start ===", file=sys.stderr)
    tr = trange(0, num_iter, desc="iter: nan - loss: nan")
    for i in tr:
        labeled_instances, labels, unlabeled_instances = data.train.next_batch(
            batch_size)

        _, tloss, lloss = sess.run(
            [train_step, loss, clean_pred_cost],
            feed_dict={
                feedforward_inputs: labeled_instances,
                outputs: labels,
                autoencoder_inputs: unlabeled_instances,
                training: True
            })
        tr.set_description("loss: %.5g - lloss: %.5g" % (tloss, lloss))

        if (i > 1) and ((i + 1) %
                        (num_iter / num_epochs) == 0) and i < num_iter - 1:
            # Compute train and validation stats for each epoch
            epoch_n = i // (num_examples // batch_size) + 1

            tqdm.write("=== Epoch %d stats ===" % epoch_n, file=sys.stderr)
            # For training data we traverse in batches and save all the information
            training_instances = data.train.labeled_ds.instances
            training_labels = data.train.labeled_ds.labels
            mean_accuracy = []
            mean_loss = []

            for start in trange(0, len(training_labels), batch_size):
                end = min(start + batch_size, len(training_labels))
                epoch_stats = sess.run(
                    [accuracy, loss, clean_pred_cost, predictions],
                    feed_dict={
                        feedforward_inputs: training_instances[start:end],
                        outputs: training_labels[start:end],
                        autoencoder_inputs: unlabeled_instances,
                        training: False
                    })

                mean_accuracy.append(epoch_stats[0])
                mean_loss.append(epoch_stats[2])

                true_labels = np.argmax(training_labels[start:end], 1)
                for i in np.arange(true_labels.shape[0]):
                    print("%s,training,%d,%.3g,%.3g,%.3g,%d,%d" %
                          (config["experiment_id"], epoch_n, epoch_stats[0],
                           epoch_stats[1], epoch_stats[2], true_labels[i],
                           epoch_stats[3][i]),
                          file=results_log)

            tqdm.write("Epoch %d: Accuracy for Training Data: %.3g" %
                       (epoch_n, np.mean(mean_accuracy)),
                       file=sys.stderr)
            tqdm.write("Epoch %d: Supervised Cost for Training Data: %.3g" %
                       (epoch_n, np.mean(mean_loss)),
                       file=sys.stderr)

            # For validation data we traverse in batches and save all the information
            validation_instances = data.validation.instances
            validation_labels = data.validation.labels
            mean_accuracy = []
            mean_loss = []

            for start in trange(0, len(validation_labels), batch_size):
                end = min(start + batch_size, len(validation_labels))
                epoch_stats = sess.run(
                    [accuracy, loss, clean_pred_cost, predictions],
                    feed_dict={
                        feedforward_inputs: validation_instances[start:end],
                        outputs: validation_labels[start:end],
                        autoencoder_inputs: unlabeled_instances,
                        training: False
                    })

                mean_accuracy.append(epoch_stats[0])
                mean_loss.append(epoch_stats[2])

                true_labels = np.argmax(validation_labels[start:end], 1)
                for i in np.arange(true_labels.shape[0]):
                    print("%s,validation,%d,%.3g,%.3g,%.3g,%d,%d" %
                          (config["experiment_id"], epoch_n, epoch_stats[0],
                           epoch_stats[1], epoch_stats[2], true_labels[i],
                           epoch_stats[3][i]),
                          file=results_log)

            tqdm.write("Epoch %d: Accuracy for Validation Data: %.3g" %
                       (epoch_n, np.mean(mean_accuracy)),
                       file=sys.stderr)
            tqdm.write("Epoch %d: Supervised Cost for Validation Data: %.3g" %
                       (epoch_n, np.mean(mean_loss)),
                       file=sys.stderr)

            results_log.flush()

            decay_after = config['decay_after']
            if (epoch_n + 1) >= decay_after:
                # decay learning rate
                # learning_rate = starter_learning_rate * ((num_epochs - epoch_n) / (num_epochs - decay_after))
                ratio = 1.0 * (
                    num_epochs - (epoch_n + 1)
                )  # epoch_n + 1 because learning rate is set for next epoch
                ratio = max(0, ratio / (num_epochs - decay_after))
                sess.run(learning_rate.assign(starter_learning_rate * ratio))

    print("=== Final stats ===", file=sys.stderr)
    epoch_n = num_iter // (num_examples // batch_size) + 1

    training_instances = data.train.labeled_ds.instances
    training_labels = data.train.labeled_ds.labels
    mean_accuracy = []
    mean_loss = []

    for start in trange(0, len(training_labels), batch_size):
        end = min(start + batch_size, len(training_labels))
        final_stats = sess.run(
            [accuracy, loss, clean_pred_cost, predictions],
            feed_dict={
                feedforward_inputs: training_instances[start:end],
                outputs: training_labels[start:end],
                autoencoder_inputs: unlabeled_instances,
                training: False
            })

        mean_accuracy.append(final_stats[0])
        mean_loss.append(final_stats[2])

        true_labels = np.argmax(training_labels[start:end], 1)
        for i in np.arange(true_labels.shape[0]):
            print("%s,training,%d,%.3g,%.3g,%.3g,%d,%d" %
                  (config["experiment_id"], epoch_n, final_stats[0],
                   final_stats[1], final_stats[2], true_labels[i],
                   final_stats[3][i]),
                  file=results_log)

    print("Final Accuracy for Training Data: %.3g" % np.mean(mean_accuracy),
          file=sys.stderr)
    print("Final Supervised Cost for Training Data: %.3g" % np.mean(mean_loss),
          file=sys.stderr)

    # For validation data we traverse in batches and save all the information
    validation_instances = data.validation.instances
    validation_labels = data.validation.labels
    mean_accuracy = []
    mean_loss = []

    for start in trange(0, len(validation_labels), batch_size):
        end = min(start + batch_size, len(validation_labels))
        final_stats = sess.run(
            [accuracy, loss, clean_pred_cost, predictions],
            feed_dict={
                feedforward_inputs: validation_instances[start:end],
                outputs: validation_labels[start:end],
                autoencoder_inputs: unlabeled_instances,
                training: False
            })
        mean_accuracy.append(final_stats[0])
        mean_loss.append(final_stats[2])

        true_labels = np.argmax(validation_labels[start:end], 1)
        for i in np.arange(true_labels.shape[0]):
            print("%s,validation,%d,%.3g,%.3g,%.3g,%d,%d" %
                  (config["experiment_id"], epoch_n, final_stats[0],
                   final_stats[1], final_stats[2], true_labels[i],
                   final_stats[3][i]),
                  file=results_log)

    print("Final Accuracy for Validation Data: %.3g" % np.mean(mean_accuracy),
          file=sys.stderr)
    print("Final Supervised Cost for Validation Data: %.3g" %
          np.mean(mean_loss),
          file=sys.stderr)

    # TEST DATA

    test_instances = data.test.instances
    test_labels = data.test.labels

    for start in trange(0, len(test_labels), batch_size):
        end = min(start + batch_size, len(test_labels))
        final_stats = sess.run(
            [accuracy, loss, clean_pred_cost, predictions],
            feed_dict={
                feedforward_inputs: test_instances[start:end],
                outputs: test_labels[start:end],
                autoencoder_inputs: unlabeled_instances,
                training: False
            })

        true_labels = np.argmax(test_labels[start:end], 1)
        for i in np.arange(true_labels.shape[0]):
            print("%s,test,%d,%.3g,%.3g,%.3g,%d,%d" %
                  (config["experiment_id"], epoch_n, final_stats[0],
                   final_stats[1], final_stats[2], true_labels[i],
                   final_stats[3][i]),
                  file=results_log)

    print("=== Experiment finished ===", file=sys.stderr)
    sess.close()
    results_log.close()

    return
Esempio n. 53
0
def create_spixel(*args):
    try:
        pixel = SuperPixel(*args)
        return pixel
    except ValueError as err:
        tqdm.write("Skipping SuperPixel. " + str(err))
Esempio n. 54
0
def train_model(model, datasets, optimizer, criterion, num_epochs=30, batch_size=128,
                device=None, scheduler=None, out=None):
    """
    train gan(generator, discriminator) with standard gan algorithm

    Parameters
    -----------------
    models: torch.nn.Module
        pre-trained model

    datasets: torch.utils.data.Dataset
        dataset of image

    optimizer: torch.optim
        optimizer for model

    criterion: torch.nn.Module
        function that calculates loss

    num_epochs: int
        number of epochs

    batch_size: int
        number of batch size

    device: torch.device

    out: pathlib.Path
        represent output directory

    Return
    -----------------------------
    model: torch.nn.Module
        best model
    """
    epochs = tqdm(range(num_epochs), desc="Epoch", unit='epoch')
    phases = ['train', 'val']
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    # construct dataloader
    dataloader = {phase: torch.utils.data.DataLoader(datasets[phase], batch_size=batch_size,
                                                     shuffle=(phase == 'train'), num_workers=2)
                  for phase in ['train', 'val']}
    dataset_sizes = {phase: len(datasets[phase]) for phase in ['train', 'val']}
    # initialize log
    log = OrderedDict()
    # train loop
    since = datetime.datetime.now()
    for epoch in epochs:
        for phase in phases:
            if phase == 'train':
                if scheduler is not None:
                    scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            train_loss = 0.0
            train_acc = 0.0
            # Iterate over data.
            iteration = tqdm(dataloader[phase],
                             desc="{} iteration".format(phase.capitalize()),
                             unit='iter')
            for inputs, labels in iteration:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    # returns loss is mean_wise
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                train_loss += loss.item() * inputs.size(0)
                train_acc += torch.sum(preds == labels.data)

            epoch_loss = train_loss / dataset_sizes[phase]
            epoch_acc = train_acc.double().item() / dataset_sizes[phase]
            tqdm.write('Epoch: {:3d} Phase: {:>5}    Loss: {:.4f} Acc: {:.4f}'.format(
                epoch+1, phase.capitalize(), epoch_loss, epoch_acc))

            if phase == 'train':
                # preserve train log
                log["epoch_{}".format(epoch+1)] = OrderedDict(train_loss=epoch_loss,
                                                              train_acc=epoch_acc)
            elif phase == 'val':
                # preserve val log
                log["epoch_{}".format(epoch+1)].update(OrderedDict(val_loss=epoch_loss,
                                                                   val_acc=epoch_acc))
                if epoch_acc > best_acc:
                    # deep copy the model
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

        # save model by epoch
        torch.save(model.state_dict(), out /
                   "model_{}epoch.pt".format(epoch+1))
        tqdm.write("-"*60)

    time_elapsed = datetime.datetime.now() - since
    tqdm.write('Training complete in {}'.format(time_elapsed))
    tqdm.write('Best val Acc: {:4f}'.format(best_acc), end="\n\n")

    # load best model weights
    model.load_state_dict(best_model_wts)

    # if test set exists, calculate loss and accuracy for best model
    if "test" in datasets:
        model.eval()
        testloader = torch.utils.data.DataLoader(datasets["test"], batch_size=batch_size,
                                                 shuffle=False, num_workers=2)
        iteration = tqdm(testloader,
                         desc="Test iteration",
                         unit='iter')
        test_loss = 0.0
        test_acc = 0
        with torch.no_grad():
            for inputs, labels in iteration:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                # returns loss is mean_wise
                loss = criterion(outputs, labels)
                # statistics
                test_loss += loss.item() * inputs.size(0)
                test_acc += torch.sum(preds == labels.data)

        test_loss = test_loss / len(datasets["test"])
        test_acc = test_acc.double().item() / len(datasets["test"])
        tqdm.write('Phase: {} Loss: {:.4f} Acc: {:.4f}'.format(
            "Test", test_loss, test_acc), end="\n\n")
        # preserve test log
        log['test'] = OrderedDict(test_loss=test_loss,
                                  test_acc=test_acc)

    # save log
    with open(out / "log.json", "w") as f:
        json.dump(log, f, indent=4, separators=(',', ': '))

    return model
Esempio n. 55
0
    def test_loop(
        self,
        test_loader,
        verbose=False,
        normalizer=None,
        return_all=False,
        debug=False,
        debug_dir=None,
    ):
        """
        Run a model test loop

        :param test_loader: torch.utils.data.DataLoader for testing, generated
            by data.datamgr.SetDataManager
        :param verbose: if verbose, use tqdm to display progress
        :param normalizer: a torchvision.transforms.Transform object used to
            normalize the image before evaluation. Used if debug is set, and we
            want the original image to save to img file
        :param return_all: return an np.array of hits (1s or 0s), instead of
            summary loss/acc statistics
        :param debug: don't actually evaluate test loop; evaluate a few
            episodes then save their results in `debug_dir`
        :param debug_dir: if debug is set, save to this directory

        :returns: either an (acc, loss) tuple, or an np.array of 1s and 0s,
            where 1 indicates a correct prediction, for the entire dataset
        """
        acc_all = []
        loss_all = []

        iter_num = len(test_loader)
        if verbose:
            ranger = tqdm(enumerate(test_loader),
                          desc="test",
                          total=len(test_loader))
        else:
            ranger = enumerate(test_loader)
        for i, (x, target, lang) in ranger:

            if normalizer is not None:
                xdim = x.shape
                xflat = x.clone().view(xdim[0] * xdim[1], *xdim[2:])
                xnorm = torch.stack([normalizer(x) for x in xflat])
                xnorm = xnorm.view(*xdim)
            else:
                xnorm = x
            self.n_query = x.size(1) - self.n_support
            if self.l3:
                correct_this, count_this, loss_this = self.correct_l3(
                    xnorm,
                    return_loss=True,
                    debug=debug,
                    index=i,
                    x_orig=x,
                    debug_dir=debug_dir,
                )
            else:
                correct_this, count_this, loss_this = self.correct(
                    xnorm, return_loss=True)
            acc_all.append(correct_this / count_this * 100)
            loss_all.append(loss_this.item())

        acc_all = np.asarray(acc_all)
        loss_all = np.asarray(loss_all)
        acc_mean = np.mean(acc_all)
        loss_mean = np.mean(loss_all)
        acc_std = np.std(acc_all)
        tqdm.write("%d Test Loss %f Acc = %4.2f%% +- %4.2f%%" %
                   (iter_num, loss_mean, acc_mean,
                    1.96 * acc_std / np.sqrt(iter_num)))

        if return_all:
            return acc_all
        return acc_mean, loss_mean
Esempio n. 56
0
def _iter_metadata(**kwargs):
    for prefix, data in iter_helper_helper(get_metadata, **kwargs):
        version = data["version"]
        tqdm.write(f"[{prefix}] using version {version}")
        yield prefix, version, data["date"], bioregistry.is_deprecated(prefix)
def speech_length_histogram(
        sessions: Iterable[int],
        histogram_upper_bound: int = 50,
        metadata_of_interest: Set[str] = {'party', 'chamber', 'gender', 'state'},
        identities: Set[str] = {'Dem', 'GOP', 'Senate', 'House', 'Male', 'Female'}
        ) -> None:

    speeches_length: defaultdict[str, List[int]] = defaultdict(list)
    for session_index in tqdm(sessions):
        metadata: Dict[str, Dict[str, str]] = dict()
        metadata_path = f'corpora/bound/{session_index:0>3d}_SpeakerMap.txt'
        with open(metadata_path) as metadata_file:
            reader = csv.DictReader(metadata_file, delimiter='|')
            for speaker_data in reader:
                if speaker_data['nonvoting'] == 'nonvoting':
                    continue
                speaker: Dict[str, str] = {
                    attribute: speaker_data[attribute]
                    for attribute in metadata_of_interest}
                metadata[speaker_data['speech_id']] = speaker

        speech_count = 0
        missing_metadata_count = 0
        corpus_path = f'corpora/bound/speeches_{session_index:0>3d}.txt'
        with open(corpus_path, encoding=input_encoding) as corpus_file:
            corpus_file.readline()  # discard header line
            for line in corpus_file:
                try:
                    speech_id, speech = line.split('|')
                    speech_count += 1
                    if speech_id not in metadata:
                        missing_metadata_count += 1
                        continue

                    speaker = metadata[speech_id]
                    party = speaker['party']
                    chamber = speaker['chamber']
                    gender = speaker['gender']
                    state = speaker['state']

                    speech_length = len(speech.split())

                    speeches_length[state].append(speech_length)
                    if party == 'D':
                        speeches_length['Dem'].append(speech_length)
                    elif party == 'R':
                        speeches_length['GOP'].append(speech_length)
                    # else:
                    #     print('Spoiler effect:', party)

                    if chamber == 'S':
                        speeches_length['Senate'].append(speech_length)
                    elif chamber == 'H':
                        speeches_length['House'].append(speech_length)
                    else:
                        print('Bicameralism is bad enough:', chamber)

                    if gender == 'M':
                        speeches_length['Male'].append(speech_length)
                    elif gender == 'F':
                        speeches_length['Female'].append(speech_length)
                    else:
                        print('Nonbinary:')
                except ValueError:  # from spliting line with '|'
                    continue
        missing_metadata_ratio = missing_metadata_count / speech_count
        tqdm.write(f'{missing_metadata_ratio:.2%} speeches in {corpus_path} '
                   'are missing metadata and excluded from the output corpus.')

    for metadata_name in identities:
        bounded_lengths = [length for length in speeches_length[metadata_name]
                           if length < histogram_upper_bound]

        if len(bounded_lengths) == 0:
            raise ValueError(f'{metadata_name} is empty?')

        fig, ax = plt.subplots()
        ax = sns.distplot(bounded_lengths, label=metadata_name)
        ax.legend()
        fig.savefig(f'graphs/speech_length/{metadata_name}.pdf')
Esempio n. 58
0
def run(run_obj):
    """
    Function to run FastSinkSource, FastSinkSourcePlus, Local and LocalPlus
    *terms_to_run*: terms for which to run the method. 
        Must be a subset of the terms present in the ann_obj
    """
    params_results = run_obj.params_results
    P, alg, params = run_obj.P, run_obj.name, run_obj.params

    #if 'solver' in params:
    # make sure the term_scores matrix is reset
    # because if it isn't empty, overwriting the stored scores seems to be time consuming
    term_scores = sp.lil_matrix(run_obj.ann_matrix.shape, dtype=np.float)
    print("Running %s with these parameters: %s" % (alg, params))
    if len(run_obj.target_prots) != len(run_obj.net_obj.nodes):
        print("\tstoring scores for only %d target prots" %
              (len(run_obj.target_prots)))

    # run FastSinkSource on each term individually
    #for i in trange(run_obj.ann_matrix.shape[0]):
    #term = run_obj.terms[i]
    for term in tqdm(run_obj.terms_to_run):
        idx = run_obj.ann_obj.term2idx[term]
        # get the row corresponding to the current terms annotations
        y = run_obj.ann_matrix[idx, :]
        positives = (y > 0).nonzero()[1]
        negatives = (y < 0).nonzero()[1]
        # if this method uses positive examples only, then remove the negative examples
        if alg in ["fastsinksourceplus", "sinksourceplus", "localplus"]:
            negatives = None

        if run_obj.net_obj.weight_gmw is True:
            start_time = time.process_time()
            # weight the network for each term individually
            W, _, _ = run_obj.net_obj.weight_GMW(y.toarray()[0], term)
            P = alg_utils.normalizeGraphEdgeWeights(
                W, ss_lambda=params.get('lambda'))
            params_results['%s_weight_time' %
                           (alg)] += time.process_time() - start_time

        # now actually run the algorithm
        if alg in [
                "fastsinksource", "fastsinksourceplus", "sinksource",
                "sinksourceplus"
        ]:
            a, eps, max_iters = params['alpha'], float(
                params['eps']), params['max_iters']
            # if a solver is given, it will be used. Otherwise it will use regular power iteration
            solver = params.get('solver')
            tol = float(params['tol']) if 'tol' in params else 1e-5
            scores, process_time, wall_time, iters = fastsinksource.runFastSinkSource(
                P,
                positives,
                negatives=negatives,
                max_iters=max_iters,
                eps=eps,
                a=a,
                tol=tol,
                solver=solver,
                verbose=run_obj.kwargs.get('verbose', False))
        elif alg in ["local", "localplus"]:
            scores, process_time, wall_time = fastsinksource.runLocal(
                P, positives, negatives=negatives)
            iters = 1

        if run_obj.kwargs.get('verbose', False) is True:
            tqdm.write("\t%s converged after %d iterations " % (alg, iters) +
                       "(%0.4f sec) for %s" % (process_time, term))

        # limit the scores to the target nodes
        if len(run_obj.target_prots) != len(scores):
            #print("\tstoring results for %d target prots" % (len(run_obj.target_prots)))
            mask = np.ones(len(scores), np.bool)
            mask[run_obj.target_prots] = False
            scores[mask] = 0
        # 0s are not explicitly stored in lil matrix
        term_scores[idx] = scores

        # also keep track of the time it takes for each of the parameter sets
        alg_name = "%s%s" % (alg, run_obj.params_str)
        params_results["%s_wall_time" % alg_name] += wall_time
        params_results["%s_process_time" % alg_name] += process_time

    run_obj.term_scores = term_scores
    run_obj.params_results = params_results
    return
Esempio n. 59
0
 def write(cls, msg):
     tqdm.write(msg, end='')
Esempio n. 60
0
def main():
    # Init logger
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)
    print('Dataset: {}'.format(args.dataset.upper()))

    if args.dataset == "seedlings" or args.dataset == "bone":
        classes, class_to_idx, num_to_class, df = GenericDataset.find_classes(
            args.data_path)
    if args.dataset == "ISIC2017":
        classes, class_to_idx, num_to_class, df = GenericDataset.find_classes_melanoma(
            args.data_path)

    df.head(3)

    args.num_classes = len(classes)
    # Init model, criterion, and optimizer
    # net = models.__dict__[args.arch](num_classes)
    # net= kmodels.simpleXX_generic(num_classes=args.num_classes, imgDim=args.imgDim)
    # net= kmodels.vggnetXX_generic(num_classes=args.num_classes,  imgDim=args.imgDim)
    # net= kmodels.vggnetXX_generic(num_classes=args.num_classes,  imgDim=args.imgDim)
    net = kmodels.dpn92(num_classes=args.num_classes)
    # net= kmodels.inception_v3(num_classes=args.num_classes)
    # print_log("=> network :\n {}".format(net), log)

    real_model_name = (type(net).__name__)
    print("=> Creating model '{}'".format(real_model_name))
    # if real_model_name is "Inception3":
    #   net = inception_v3(pretrained=True)
    #   net.fc = nn.Linear(2048, args.num_classes)

    import datetime

    exp_name = datetime.datetime.now().strftime(real_model_name + '_' +
                                                args.dataset +
                                                '_%Y-%m-%d_%H-%M-%S')
    print('Training ' + real_model_name +
          ' on {} dataset:'.format(args.dataset.upper()))

    mPath = args.save_path + '/' + args.dataset + '/' + real_model_name + '/'
    args.save_path_model = mPath
    if not os.path.isdir(args.save_path_model):
        os.makedirs(args.save_path_model)

    log = open(os.path.join(mPath, 'seed_{}.txt'.format(args.manualSeed)), 'w')
    print_log('save path : {}'.format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print("Random Seed: {}".format(args.manualSeed))
    print("python version : {}".format(sys.version.replace('\n', ' ')))
    print("torch  version : {}".format(torch.__version__))
    print("cudnn  version : {}".format(torch.backends.cudnn.version()))

    # Init dataset
    if not os.path.isdir(args.data_path):
        os.makedirs(args.data_path)
    normalize_img = torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    train_trans = transforms.Compose([
        transforms.RandomSizedCrop(args.img_scale),
        PowerPIL(),
        transforms.ToTensor(),
        # normalize_img,
        RandomErasing()
    ])

    ## Normalization only for validation and test
    valid_trans = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(args.img_scale),
        transforms.ToTensor(),
        # normalize_img
    ])

    test_trans = valid_trans

    train_data = df.sample(frac=args.validationRatio)
    valid_data = df[~df['file'].isin(train_data['file'])]

    train_set = GenericDataset(train_data,
                               args.data_path,
                               transform=train_trans)
    valid_set = GenericDataset(valid_data,
                               args.data_path,
                               transform=valid_trans)

    t_loader = DataLoader(train_set,
                          batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=0)
    v_loader = DataLoader(valid_set,
                          batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=0)
    # test_loader  = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)

    dataset_sizes = {
        'train': len(t_loader.dataset),
        'valid': len(v_loader.dataset)
    }
    print(dataset_sizes)
    # net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
    criterion = torch.nn.CrossEntropyLoss()

    # optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
    #               weight_decay=state['decay'], nesterov=True)

    # optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
    optimizer = torch.optim.SGD(net.parameters(),
                                state['learning_rate'],
                                momentum=state['momentum'],
                                weight_decay=state['decay'],
                                nesterov=True)
    # optimizer = torch.optim.Adam(net.parameters(), lr=state['learning_rate'])

    if args.use_cuda:
        net.cuda()
        criterion.cuda()

    recorder = RecorderMeter(args.epochs)
    # optionally resume from a checkpoint
    if args.evaluate:
        validate(v_loader, net, criterion, log)
        return
    if args.tensorboard:
        configure("./logs/runs/%s" % (exp_name))

    print('    Total params: %.2fM' %
          (sum(p.numel() for p in net.parameters()) / 1000000.0))

    # Main loop
    start_training_time = time.time()
    training_time = time.time()
    start_time = time.time()
    epoch_time = AverageMeter()
    for epoch in tqdm(range(args.start_epoch, args.epochs)):
        current_learning_rate = adjust_learning_rate(optimizer, epoch,
                                                     args.gammas,
                                                     args.schedule)
        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)
        print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
    # print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                    + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

        tqdm.write(
            '\n==>>Epoch=[{:03d}/{:03d}]], {:s}, LR=[{}], Batch=[{}]'.format(
                epoch, args.epochs, time_string(), state['learning_rate'],
                args.batch_size) + ' [Model={}]'.format(
                    (type(net).__name__), ), log)

        # train for one epoch
        train_acc, train_los = train(t_loader, net, criterion, optimizer,
                                     epoch, log)
        val_acc, val_los = validate(v_loader, net, criterion, epoch, log)
        is_best = recorder.update(epoch, train_los, train_acc, val_los,
                                  val_acc)

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        training_time = time.time() - start_training_time
        recorder.plot_curve(
            os.path.join(mPath, real_model_name + '_' + exp_name + '.png'),
            training_time, net, real_model_name, dataset_sizes,
            args.batch_size, args.learning_rate, args.dataset, args.manualSeed,
            args.num_classes)

        if float(val_acc) > float(95.0):
            print("*** EARLY STOP ***")
            df_pred = testSeedlingsModel(args.test_data_path, net,
                                         num_to_class, test_trans)
            model_save_path = os.path.join(
                mPath, real_model_name + '_' + str(val_acc) + '_' +
                str(val_los) + "_" + str(epoch))

            df_pred.to_csv(model_save_path + "_sub.csv",
                           columns=('file', 'species'),
                           index=None)
            torch.save(net.state_dict(), model_save_path + '_.pth')

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    # 'arch': args.arch,
                    'state_dict': net.state_dict(),
                    'recorder': recorder,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                mPath,
                str(val_acc) + '_' + str(val_los) + "_" + str(epoch) +
                '_checkpoint.pth.tar')

    log.close()