Example #1
0
def getJson(*parts, method='get', postData=None):
    '''Submit a request and return the response for testing'''
    # construct the URL
    path = root + makeURL(*parts)
    # start the request object
    request = urllib.request.Request(path, method=method)
    # if we have data to post encode it properly
    if postData:
        # add the content header to signal json is coming
        request.add_header('Content-Type', 'application/json; charset=utf-8')
        # encode as json and then as utf-8 bytes
        postData = json.dumps(postData).encode('utf-8')
        # tell it the length of the data
        request.add_header('Content-Length', len(postData))

    result = None
    try:
        with urllib.request.urlopen(request, postData) as fp:
            code = fp.getcode()
            if code == 200:
                data = fp.read().decode('utf-8')
                result = json.loads(data)
    except urllib.error.HTTPError as e:
        code = e.code

    print(f'http response = {code}')
    if result:
        pprint(result, max_seq_length=8)
    return result or code
Example #2
0
    def _ipython_display_(self, **kwargs):
        widget = self._widget()
        if widget is not None:
            return widget._ipython_display_(**kwargs)
        from IPython.lib.pretty import pprint

        pprint(self)
Example #3
0
def main(_config, _run: sacred.run.Run):
    """
    """
    sacred.commands.print_config(_run)

    model = Model.from_config(_config['model'])

    print('Model config')
    pprint(model.config)
Example #4
0
def pprint(*args, **kwargs):
    """
    Pretty-print a Python object using ``IPython.lib.pretty.pprint``.
    Fallback to ``pprint.pprint`` if IPython is not available.
    """
    try:
        from IPython.lib.pretty import pprint
    except ImportError:
        from pprint import pprint
    pprint(*args, **kwargs)
Example #5
0
def test_ipython_pprint():
    from io import StringIO
    eqs = Equations("""dv/dt = -(v + I)/ tau : volt (unless refractory)
                       I = sin(2 * 22/7. * f * t)* volt : volt
                       f : Hz""")
    # Test ipython's pretty printing
    old_stdout = sys.stdout
    string_output = StringIO()
    sys.stdout = string_output
    pprint(eqs)
    assert len(string_output.getvalue()) > 0
    sys.stdout = old_stdout
Example #6
0
def test_ipython_pprint():
    if pprint is None:
        raise SkipTest('ipython is not available')
    eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless refractory)
                       I = sin(2 * 22/7. * f * t)* volt : volt
                       f : Hz''')
    # Test ipython's pretty printing
    old_stdout = sys.stdout
    string_output = StringIO()
    sys.stdout = string_output
    pprint(eqs)
    assert len(string_output.getvalue()) > 0
    sys.stdout = old_stdout
Example #7
0
def test_ipython_pprint():
    if pprint is None:
        raise SkipTest('ipython is not available')
    eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless refractory)
                       I = sin(2 * 22/7. * f * t)* volt : volt
                       f : Hz''')
    # Test ipython's pretty printing
    old_stdout = sys.stdout
    string_output = StringIO()
    sys.stdout = string_output
    pprint(eqs)
    assert len(string_output.getvalue()) > 0
    sys.stdout = old_stdout
Example #8
0
def main(_config, _run: sacred.run.Run):
    """
    python parametized.py print_config
    python parametized.py print_config with model.kwargs.encoder.cls=RecurrentEncoder model.kwargs.vae_param=10
    """
    from IPython.lib.pretty import pprint
    sacred.commands.print_config(_run)

    model = VAE.from_config(_config['model'])

    print('Model config')
    pprint(model.config)
    print('Encoder config')
    pprint(model.encoder)
def main(argv=None):
    """
    psd-tools command line utility.

    Usage:
        psd-tools export <input_file> <output_file> [options]
        psd-tools show <input_file> [options]
        psd-tools debug <input_file> [options]
        psd-tools -h | --help
        psd-tools --version

    Options:
        -v --verbose                Be more verbose.

    Example:
        psd-tools show example.psd  # Show the file content
        psd-tools export example.psd example.png  # Export as PNG
        psd-tools export example.psd[0] example-0.png  # Export layer as PNG
    """

    args = docopt.docopt(main.__doc__, version=__version__, argv=argv)

    if args['--verbose']:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    if args['export']:
        input_parts = args['<input_file>'].split('[')
        input_file = input_parts[0]
        if len(input_parts) > 1:
            indices = [int(x.rstrip(']')) for x in input_parts[1:]]
        else:
            indices = []
        layer = PSDImage.open(input_file)
        for index in indices:
            layer = layer[index]
        if isinstance(layer, PSDImage) and layer.has_preview():
            image = layer.topil()
        else:
            image = layer.compose()
        image.save(args['<output_file>'])

    elif args['show']:
        psd = PSDImage.open(args['<input_file>'])
        pprint(psd)

    elif args['debug']:
        psd = PSDImage.open(args['<input_file>'])
        pprint(psd._record)
Example #10
0
def test_ipython_pprint():
    try:
        from cStringIO import StringIO  # Python 2
    except ImportError:
        from io import StringIO  # Python 3
    eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless refractory)
                       I = sin(2 * 22/7. * f * t)* volt : volt
                       f : Hz''')
    # Test ipython's pretty printing
    old_stdout = sys.stdout
    string_output = StringIO()
    sys.stdout = string_output
    pprint(eqs)
    assert len(string_output.getvalue()) > 0
    sys.stdout = old_stdout
Example #11
0
def search_bar(request):
    searchText = request.GET['text']

    response_data = dict()
    response_data['number_of_results'] = 0
    response_data['results'] = []
    if searchText.strip() == "":
        print "NO STRING"
        return HttpResponse(json.dumps(response_data), content_type="application/json")

    members = Member.objects.filter(name__contains=searchText, is_current=True)
    for member in members:
        newResult = dict()
        newResult['id'] = member.id
        newResult['name'] = member.name
        newResult['party'] = member.current_party.name
        newResult['type'] = "member"
        response_data['results'].append(newResult)
        response_data['number_of_results'] += 1

    tags = Tag.objects.filter(name__contains=searchText)
    for tag in tags:
        newResult = dict()
        newResult['id'] = tag.id
        newResult['name'] = tag.name
        newResult['type'] = "tag"
        response_data['results'].append(newResult)
        response_data['number_of_results'] += 1

    parties = Party.objects.filter(name__contains=searchText, knesset__number=CURRENT_KNESSET_NUMBER)
    for party in parties:
        newResult = dict()
        newResult['id'] = party.id
        newResult['name'] = party.name
        newResult['type'] = "party"
        response_data['results'].append(newResult)
        response_data['number_of_results'] += 1

    print 'number of results:', response_data['number_of_results']
    pprint(response_data)
    return HttpResponse(json.dumps(response_data), content_type="application/json")
Example #12
0
def test_str_repr():
    '''
    Test the string representation (only that it does not throw errors).
    '''
    tau = 10 * ms
    eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless-refractory)
                       I = sin(2 * 22/7. * f * t)* volt : volt
                       f : Hz''')
    assert len(str(eqs)) > 0
    assert len(repr(eqs)) > 0

    # Test str and repr of SingleEquations explicitly (might already have been
    # called by Equations
    for eq in eqs.itervalues():
        assert (len(str(eq))) > 0
        assert (len(repr(eq))) > 0

    # Test ipython's pretty printing
    old_stdout = sys.stdout
    string_output = StringIO()
    sys.stdout = string_output
    pprint(eqs)
    assert len(string_output.getvalue()) > 0
    sys.stdout = old_stdout
Example #13
0
def test_str_repr():
    '''
    Test the string representation (only that it does not throw errors).
    '''
    tau = 10 * ms
    eqs = Equations('''dv/dt = -(v + I)/ tau : volt (active)
                       I = sin(2 * 22/7. * f * t)* volt : volt
                       f : Hz''')
    assert len(str(eqs)) > 0
    assert len(repr(eqs)) > 0
    
    # Test str and repr of SingleEquations explicitly (might already have been
    # called by Equations
    for eq in eqs.equations.itervalues():
        assert(len(str(eq))) > 0
        assert(len(repr(eq))) > 0
    
    # Test ipython's pretty printing
    old_stdout = sys.stdout
    string_output = StringIO()
    sys.stdout = string_output
    pprint(eqs)
    assert len(string_output.getvalue()) > 0
    sys.stdout = old_stdout
Example #14
0
def pretty_print_options(options):
    l = nested_dict_to_list_of_tuples(options)
    pprint(l)
def scrape():

    cwd = os.getcwd()

    ApiKey = 'eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo'

    #url = 'https://api.nasa.gov/insight_weather/?api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo&feedtype=json&ver=1.0'

    #url = 'https://api.nasa.gov/insight_weather/?api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo&feedtype=json&ver=1.0'

    #url = 'https://api.nasa.gov/DONKI/notifications?startDate=2014-05-01&endDate=2014-05-08&type=all&api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo'
    #url = 'https://api.nasa.gov/DONKI/notifications?startDate=2014-05-01&endDate=2014-05-08&type=all&api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo'
    url = 'https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?&table=exoplanets&format=json&where=pl_kepflag=1'

    #url = 'https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?sol=1000&page=2&api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo'

    response = requests.get(url).json()

    pprint("INSIDE ScRAPE")

    # open a file for writing

    outputfile = open(f'{cwd}\data\Data1.csv', 'w', newline='')

    pprint("INSIDE ScRAPE0")
    # create the csv writer object

    csvwriter = csv.writer(outputfile)

    count = 0

    for resp in response:

        if count == 0:

            header = resp.keys()

            csvwriter.writerow(header)

            count += 1

        csvwriter.writerow(resp.values())

    outputfile.close()

    pprint("INSIDE ScRAPE1")

    engine = create_engine('postgresql://*****:*****@localhost/NASA')
    connection = engine.connect()

    pprint("INSIDE ScRAPE2")

    conn = psycopg2.connect(
        "host='localhost' port='5432' dbname='NASA' user='******' password='******'"
    )

    pprint("INSIDE ScRAPE20")

    cur = conn.cursor()

    pprint("INSIDE ScRAPE201")

    cur.execute("truncate table kepler_system")

    pprint("INSIDE ScRAPE21")

    #f = open(r'C:\\bootcamp\VisualProject\Data1.csv', 'r')
    f = open(f'{cwd}\data\Data1.csv', 'r')
    cur.copy_from(f, "kepler_system", sep=',')
    f.close()

    pprint("INSIDE ScRAPE22")

    conn.commit()
    conn.close()
    # cur.execute("""Copy kepler_system from 'C:\\bootcamp\VisualProject\Data1.csv';""")
    # conn.commit()
    # conn.close()

    #cur1 = conn.cursor()

    pprint("INSIDE ScRAPE3")

    conn = psycopg2.connect(
        "host='localhost' port='5432' dbname='NASA' user='******' password='******'"
    )
    cur = conn.cursor()

    #sql = "copy (SELECT * FROM kepler_system ) TO 'C:\\bootcamp\VisualProject\DataForD3.csv' CSV  HEADER WITH CSV DELIMITER ',';"
    #sql = f"copy (SELECT * FROM kepler_small) TO '{cwd}\static\DataForD3.csv' CSV;"
    sql = f"copy (SELECT * FROM kepler_system  ) TO '{cwd}\static\DataForD3.csv' CSV;"

    pprint("INSIDE ScRAPE4")

    cur.execute(sql)
    cur.close()

    # f = open(r'C:\\bootcamp\VisualProject\DataForD3.csv', 'w')
    # cur.copy_from(f, "kepler_system", sep=',')
    # f.close()

    return "Finished"
Example #16
0
def pretty_print(point_tuple):
    """ Pretty print the tuple representation of the options. """
    pprint(tuple(zip(parameterNames, point_tuple)))
Example #17
0
def test_repr_pretty(fixture):
    fixture.__repr__()
    pprint(fixture)
Example #18
0
    '''path : pathSegments'''
    p[0] = IdNode(p[1])

def p_path_segments(p):
    '''pathSegments : pathSegments SEP ID
                    | ID
    '''
    if len(p) == 4:
        seg = p[1]
        seg.push(p[3])
    else:
        seg = [p[1]]
    p[0] = seg

def compile(template):
    lex.lex()
    # lex.lexer.push_state('mu')
    lex.input(template)
    while 1:
        tok = lex.token()
        if not tok: break
        print tok
    yacc.yacc()
    return yacc.parse(template)

if __name__ == '__main__':
    # print compile("test")
    x = compile("{{#if nothing}}Scratchpad{{else}}{{# complicated}}{{> partial}}{{/complicated}}{{/if}}")
    # print "%s\n%r" % (x, x)
    pretty.pprint(x, max_width=60)
            [
                'python',
                json2text,
                '--mictype=worn',
                file,
            ],
            stderr=None,
        ).stdout

        for line in kaldi_transcriptions.strip().split('\n'):
            example_id, transcription = map(str.strip,
                                            (line + ' ').split(' ',
                                                               maxsplit=1))

            example_id_split = example_id.split('_')
            speaker_id, session_id, remaining = example_id_split
            location, start, end = remaining.split('-')
            example_id = f'{speaker_id}_{session_id}_{start}-{end}'

            assert example_id not in all_kaldi_transcriptions, (
                example_id, all_kaldi_transcriptions)

            all_kaldi_transcriptions[example_id] = transcription

    return all_kaldi_transcriptions


if __name__ == '__main__':
    pprint(list(get_kaldi_transcriptions().items())[:5])
    pprint(list(get_kaldi_transcriptions(None).items())[:5])
def test_single_model():
    if sys.platform.startswith('win'):
        pytest.skip(
            'this doctest does not work on Windows, '
            'training is not possible on Windows due to symlinks being unavailable'
        )

    tr_dataset, dt_dataset = get_dataset()
    tr_dataset = tr_dataset[:2]
    dt_dataset = dt_dataset[:2]

    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_dir = Path(tmp_dir)

        config = pt.Trainer.get_config(updates=pb.utils.nested.deflatten(
            {
                'model.factory': Model,
                'storage_dir': str(tmp_dir),
                'stop_trigger': (2, 'epoch'),
                'summary_trigger': (3, 'iteration'),
                'checkpoint_trigger': (2, 'iteration')
            }))

        t = pt.Trainer.from_config(config)
        pre_state_dict = copy.deepcopy(t.state_dict())

        files_before = tuple(tmp_dir.glob('*'))
        if len(files_before) != 0:
            # no event file
            raise Exception(files_before)

        t.register_validation_hook(validation_iterator=dt_dataset,
                                   max_checkpoints=None)

        # Wrap each trigger in each hook with TriggerMock.
        log_list = []
        for hook in t.hooks:
            for k, v in list(hook.__dict__.items()):
                if isinstance(v, pt.train.trigger.Trigger):
                    hook.__dict__[k] = TriggerMock(v, log_list)
        t.train(train_dataset=tr_dataset, resume=False)

        hook_calls = ('\n'.join(log_list))

        # CheckpointedValidationHook trigger is called two times
        #   (once for checkpointing once for validation)_file_name

        hook_calls_ref = textwrap.dedent('''
        I:0, E: 0, True, SummaryHook.pre_step
        I:0, E: 0, True, BackOffValidationHook.pre_step
        I:0, E: 0, True, CheckpointHook.pre_step
        I:0, E: 0, False, StopTrainingHook.pre_step
        I:1, E: 0, False, SummaryHook.pre_step
        I:1, E: 0, False, BackOffValidationHook.pre_step
        I:1, E: 0, False, CheckpointHook.pre_step
        I:1, E: 0, False, StopTrainingHook.pre_step
        I:2, E: 1, False, SummaryHook.pre_step
        I:2, E: 1, True, BackOffValidationHook.pre_step
        I:2, E: 1, True, CheckpointHook.pre_step
        I:2, E: 1, False, StopTrainingHook.pre_step
        I:3, E: 1, True, SummaryHook.pre_step
        I:3, E: 1, False, BackOffValidationHook.pre_step
        I:3, E: 1, False, CheckpointHook.pre_step
        I:3, E: 1, False, StopTrainingHook.pre_step
        I:4, E: 2, False, SummaryHook.pre_step
        I:4, E: 2, True, BackOffValidationHook.pre_step
        I:4, E: 2, True, CheckpointHook.pre_step
        I:4, E: 2, True, StopTrainingHook.pre_step
        ''').strip()

        print('#' * 80)
        print(hook_calls)
        print('#' * 80)

        if hook_calls != hook_calls_ref:
            import difflib
            raise AssertionError('\n' + ('\n'.join(
                difflib.ndiff(
                    hook_calls_ref.splitlines(),
                    hook_calls.splitlines(),
                ))))

        old_event_files = []

        files_after = tuple(tmp_dir.glob('*'))
        assert len(files_after) == 2, files_after
        for file in sorted(files_after):
            if 'tfevents' in file.name:
                old_event_files.append(file)
                events = list(load_events_as_dict(file))

                tags = []
                # time_rel_data_loading = []
                # time_rel_train_step = []
                time_per_iteration = []

                relative_timings = collections.defaultdict(list)
                relative_timing_keys = {
                    'training_timings/time_rel_data_loading',
                    'training_timings/time_rel_to_device',
                    'training_timings/time_rel_forward',
                    'training_timings/time_rel_review',
                    'training_timings/time_rel_backward',
                    'training_timings/time_rel_optimize',
                }
                for event in events:
                    if 'summary' in event.keys():
                        value, = event['summary']['value']
                        tags.append(value['tag'])
                        if value['tag'] in relative_timing_keys:
                            relative_timings[value['tag']].append(
                                value['simple_value'])
                        elif value[
                                'tag'] == 'training_timings/time_per_iteration':
                            time_per_iteration.append(value['simple_value'])

                c = dict(collections.Counter(tags))
                # Training summary is written two times (at iteration 3 when
                #   summary_trigger triggers and when training stops and
                #   summary_hook is closed).
                # Validation summary is written when checkpoint_trigger
                #   triggers, hence 3 times.
                #   non_validation_time can only be measured between
                #   validations => 2 values (one fewer than validation_time)
                expect = {
                    'training/grad_norm': 2,
                    'training/grad_norm_': 2,
                    'training/loss': 2,
                    'training/lr/param_group_0': 2,
                    'training_timings/time_per_iteration': 2,
                    'training_timings/time_rel_to_device': 2,
                    'training_timings/time_rel_forward': 2,
                    'training_timings/time_rel_review': 2,
                    'training_timings/time_rel_backward': 2,
                    'training_timings/time_rel_optimize': 2,
                    'training_timings/time_rel_data_loading': 2,
                    # 'training_timings/time_rel_step': 2,
                    'validation/loss': 3,
                    'validation_timings/time_per_iteration': 3,
                    'validation_timings/time_rel_to_device': 3,
                    'validation_timings/time_rel_forward': 3,
                    'validation_timings/time_rel_review': 3,
                    'validation_timings/time_rel_data_loading': 3,
                    # 'validation_timings/time_rel_step': 3,
                    # non validation time can only be measured between
                    # validations:
                    #  => # of non_val_time - 1 == # of val_time
                    'validation_timings/non_validation_time': 2,
                    'validation_timings/validation_time': 3,
                }
                pprint(c)
                if c != expect:
                    import difflib

                    raise AssertionError('\n' + ('\n'.join(
                        difflib.ndiff(
                            [
                                f'{k!r}: {v!r}'
                                for k, v in sorted(expect.items())
                            ],
                            [f'{k!r}: {v!r}' for k, v in sorted(c.items())],
                        ))))
                assert len(events) == 46, (len(events), events)

                assert relative_timing_keys == set(
                    relative_timings.keys()), (relative_timing_keys,
                                               relative_timings)

                for k, v in relative_timings.items():
                    assert len(v) > 0, (k, v, relative_timings)

                # The relative timings should sum up to one,
                # but this model is really cheap.
                # e.g. 0.00108 and 0.000604 per iteration.
                # This may cause the mismatch.
                # Allow a calculation error of 25%.
                # ToDo: Get this work with less than 1% error.
                relative_times = np.array(list(
                    relative_timings.values())).sum(axis=0)
                if not np.all(relative_times > 0.75):
                    raise AssertionError(
                        pretty((relative_times, time_per_iteration,
                                dict(relative_timings))))
                if not np.all(relative_times <= 1):
                    raise AssertionError(
                        pretty((relative_times, time_per_iteration,
                                dict(relative_timings))))

            elif file.name == 'checkpoints':
                checkpoints_files = tuple(file.glob('*'))
                assert len(checkpoints_files) == 5, checkpoints_files
                checkpoints_files_name = [f.name for f in checkpoints_files]
                expect = {
                    'ckpt_0.pth', 'ckpt_2.pth', 'ckpt_4.pth',
                    'ckpt_best_loss.pth', 'ckpt_latest.pth'
                }
                assert expect == set(checkpoints_files_name), (
                    expect, checkpoints_files_name)
                ckpt_ranking = torch.load(
                    str(file / 'ckpt_latest.pth'
                        ))['hooks']['BackOffValidationHook']['ckpt_ranking']
                assert ckpt_ranking[0][1] > 0, ckpt_ranking
                for i, ckpt in enumerate(ckpt_ranking):
                    ckpt_ranking[i] = (ckpt[0], -1)
                expect = [(f'ckpt_{i}.pth', -1) for i in [0, 2, 4]]
                assert ckpt_ranking == expect, (ckpt_ranking, expect)

                for symlink in [
                        file / 'ckpt_latest.pth',
                        file / 'ckpt_best_loss.pth',
                ]:
                    assert symlink.is_symlink(), symlink

                    target = os.readlink(str(symlink))
                    if '/' in target:
                        raise AssertionError(
                            f'The symlink {symlink} contains a "/".\n'
                            f'Expected that the symlink has a ralative target,\n'
                            f'but the target is: {target}')
            else:
                raise ValueError(file)

        post_state_dict = copy.deepcopy(t.state_dict())
        assert pre_state_dict.keys() == post_state_dict.keys()

        equal_amount = {
            key: (pt.utils.to_numpy(parameter_pre) == pt.utils.to_numpy(
                post_state_dict['model'][key])).mean()
            for key, parameter_pre in pre_state_dict['model'].items()
        }

        # ToDo: why are so many weights unchanged? Maybe the zeros in the image?
        assert equal_amount == {'l.bias': 0.0, 'l.weight': 0.6900510204081632}

        import time
        # tfevents use unixtime as unique indicator. Sleep 2 seconds to ensure
        # new value
        time.sleep(2)

        config['stop_trigger'] = (4, 'epoch')
        t = pt.Trainer.from_config(config)
        t.register_validation_hook(validation_iterator=dt_dataset,
                                   max_checkpoints=None)
        log_list = []
        for hook in t.hooks:
            for k, v in list(hook.__dict__.items()):
                if isinstance(v, pt.train.trigger.Trigger):
                    hook.__dict__[k] = TriggerMock(v, log_list)
        t.train(train_dataset=tr_dataset, resume=True)

        hook_calls = ('\n'.join(log_list))

        hook_calls_ref = textwrap.dedent('''
        I:4, E: 2, False, SummaryHook.pre_step
        I:4, E: 2, False, BackOffValidationHook.pre_step
        I:4, E: 2, False, CheckpointHook.pre_step
        I:4, E: 2, False, StopTrainingHook.pre_step
        I:5, E: 2, False, SummaryHook.pre_step
        I:5, E: 2, False, BackOffValidationHook.pre_step
        I:5, E: 2, False, CheckpointHook.pre_step
        I:5, E: 2, False, StopTrainingHook.pre_step
        I:6, E: 3, True, SummaryHook.pre_step
        I:6, E: 3, True, BackOffValidationHook.pre_step
        I:6, E: 3, True, CheckpointHook.pre_step
        I:6, E: 3, False, StopTrainingHook.pre_step
        I:7, E: 3, False, SummaryHook.pre_step
        I:7, E: 3, False, BackOffValidationHook.pre_step
        I:7, E: 3, False, CheckpointHook.pre_step
        I:7, E: 3, False, StopTrainingHook.pre_step
        I:8, E: 4, False, SummaryHook.pre_step
        I:8, E: 4, True, BackOffValidationHook.pre_step
        I:8, E: 4, True, CheckpointHook.pre_step
        I:8, E: 4, True, StopTrainingHook.pre_step
        ''').strip()

        print('#' * 80)
        print(hook_calls)
        print('#' * 80)

        if hook_calls != hook_calls_ref:
            import difflib
            raise AssertionError('\n' + ('\n'.join(
                difflib.ndiff(
                    hook_calls_ref.splitlines(),
                    hook_calls.splitlines(),
                ))))

        files_after = tuple(tmp_dir.glob('*'))
        assert len(files_after) == 3, files_after
        for file in sorted(files_after):
            if 'tfevents' in file.name:
                if file in old_event_files:
                    continue

                events = list(load_events_as_dict(file))

                tags = []
                for event in events:
                    if 'summary' in event.keys():
                        value, = event['summary']['value']
                        tags.append(value['tag'])

                c = dict(collections.Counter(tags))
                assert len(events) == 38, (len(events), events)
                expect = {
                    'training/grad_norm': 2,
                    'training/grad_norm_': 2,
                    'training/loss': 2,
                    'training/lr/param_group_0': 2,
                    'training_timings/time_per_iteration': 2,
                    'training_timings/time_rel_to_device': 2,
                    'training_timings/time_rel_forward': 2,
                    'training_timings/time_rel_review': 2,
                    'training_timings/time_rel_backward': 2,
                    'training_timings/time_rel_optimize': 2,
                    'training_timings/time_rel_data_loading': 2,
                    # 'training_timings/time_rel_step': 2,
                    'validation/loss': 2,
                    # 'validation/lr/param_group_0': 2,
                    'validation_timings/time_per_iteration': 2,
                    'validation_timings/time_rel_to_device': 2,
                    'validation_timings/time_rel_forward': 2,
                    'validation_timings/time_rel_review': 2,
                    'validation_timings/time_rel_data_loading': 2,
                    # 'validation_timings/time_rel_step': 2,
                    # non validation time can only be measured between
                    # validations:
                    #  => # of non_val_time - 1 == # of val_time
                    'validation_timings/non_validation_time': 1,
                    'validation_timings/validation_time': 2,
                }
                if c != expect:
                    import difflib

                    raise AssertionError('\n' + ('\n'.join(
                        difflib.ndiff(
                            [
                                f'{k!r}: {v!r}'
                                for k, v in sorted(expect.items())
                            ],
                            [f'{k!r}: {v!r}' for k, v in sorted(c.items())],
                        ))))
            elif file.name == 'checkpoints':
                checkpoints_files = tuple(file.glob('*'))
                assert len(checkpoints_files) == 7, checkpoints_files
                checkpoints_files_name = [f.name for f in checkpoints_files]
                expect = {
                    *[f'ckpt_{i}.pth' for i in [0, 2, 4, 6, 8]],
                    'ckpt_best_loss.pth', 'ckpt_latest.pth'
                }
                assert expect == set(checkpoints_files_name), (
                    expect, checkpoints_files_name)
            else:
                raise ValueError(file)
            dic["dalhousie university"] = dic["dalhousie university"] + 1
            dic_count[
                "dalhousie university"] = dic_count["dalhousie university"] + 1

print("\n\nTotal Docuemnts:" + str(counter))
finalop = []
for x in dic.keys():
    finalop.append([
        x, dic_count[x], "{0}/{1}={2:.2f}".format(counter, dic_count[x],
                                                  (counter / dic_count[x])),
        "{0:.2f}".format(m.log(counter / dic_count[x], 10))
    ])

final_tab = pd.DataFrame.from_records(
    finalop, columns=["Search Query", "DF", "N/DF", "Log(N/DF)"])
pprint(final_tab)

# print(dic)
# print(dic_count)

# reference for converting list into dataframe https://thispointer.com/python-pandas-how-to-convert-lists-to-a-dataframe/
print("\n\nTerm: Canada")
col = [
    "Canada appeared in " + str(len(canada_count)) + " documents",
    "Total Words(m)", "Frequency(f)"
]
canada_df = pd.DataFrame.from_records(canada_count, columns=col)
pprint(canada_df)

print(
    "\nMaximum f/m for word Canada is noted in article #{0} and f/m value is {1:.2f}"
Example #22
0
def print_options_as_nested_dict(point_tuple):
    pprint(
        unmangle_tuples_to_nested_dict(tuple(zip(parameterNames,
                                                 point_tuple))))
Example #23
0
            bow[word] = bow[word] + 1
    polarity = "NEUTRAL"
    if (pcount > ncount):
        polarity = "POSITIVE"
    elif (pcount < ncount):
        polarity = "NEGATIVE"
    output.append([counter, line, ' '.join(match), polarity])
    counter = counter + 1

print("\n Storing output into csv files")
df = pd.DataFrame.from_records(output,
                               columns=["ID", "Tweet", "Match", "Polarity"])
df.to_csv("data/output.csv", index=False)

print("\n Tweet analysis")
pprint(df)

final_bow = []
for x in bow:
    if x in plist:
        final_bow.append([x, bow[x], "POSITIVE"])
    elif x in nlist:
        final_bow.append([x, bow[x], "NEGATIVE"])

# reference for converting list into dataframe https://thispointer.com/python-pandas-how-to-convert-lists-to-a-dataframe/
# crate dataframe based on bow
df_bow = pd.DataFrame.from_records(final_bow,
                                   columns=["Word", "Frequency", "Polarity"])
df_bow.to_csv("data/bog.csv", index=False)
print("/n Bag of Word with frequency and polarity")
pprint(df_bow)
Example #24
0
def test_single_model():
    tr_dataset, dt_dataset = get_dataset()
    tr_dataset = tr_dataset[:2]
    dt_dataset = dt_dataset[:2]

    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_dir = Path(tmp_dir)

        config = pt.Trainer.get_config(updates=pb.utils.nested.deflatten(
            {
                'model.factory': Model,
                'storage_dir': str(tmp_dir),
                'stop_trigger': (2, 'epoch'),
                'summary_trigger': (3, 'iteration'),
                'checkpoint_trigger': (2, 'iteration')
            }))

        t = pt.Trainer.from_config(config)
        pre_state_dict = copy.deepcopy(t.state_dict())

        files_before = tuple(tmp_dir.glob('*'))
        if len(files_before) != 0:
            # no event file
            raise Exception(files_before)

        t.register_validation_hook(validation_iterator=dt_dataset,
                                   max_checkpoints=None)

        # Wrap each trigger in each hook with TriggerMock.
        log_list = []
        for hook in t.hooks:
            for k, v in list(hook.__dict__.items()):
                if isinstance(v, pt.train.trigger.Trigger):
                    hook.__dict__[k] = TriggerMock(v, log_list)
        t.train(train_iterator=tr_dataset, resume=False)

        hook_calls = ('\n'.join(log_list))

        # CheckpointedValidationHook trigger is called two times
        #   (once for checkpointing once for validation)_file_name

        hook_calls_ref = textwrap.dedent('''
        I:0, E: 0, True, SummaryHook.pre_step
        I:0, E: 0, True, CheckpointHook.pre_step
        I:0, E: 0, True, BackOffValidationHook.pre_step
        I:0, E: 0, False, StopTrainingHook.pre_step
        I:1, E: 0, False, SummaryHook.pre_step
        I:1, E: 0, False, CheckpointHook.pre_step
        I:1, E: 0, False, BackOffValidationHook.pre_step
        I:1, E: 0, False, StopTrainingHook.pre_step
        I:2, E: 1, False, SummaryHook.pre_step
        I:2, E: 1, True, CheckpointHook.pre_step
        I:2, E: 1, True, BackOffValidationHook.pre_step
        I:2, E: 1, False, StopTrainingHook.pre_step
        I:3, E: 1, True, SummaryHook.pre_step
        I:3, E: 1, False, CheckpointHook.pre_step
        I:3, E: 1, False, BackOffValidationHook.pre_step
        I:3, E: 1, False, StopTrainingHook.pre_step
        I:4, E: 2, False, SummaryHook.pre_step
        I:4, E: 2, True, CheckpointHook.pre_step
        I:4, E: 2, True, BackOffValidationHook.pre_step
        I:4, E: 2, True, StopTrainingHook.pre_step
        ''').strip()

        print('#' * 80)
        print(hook_calls)
        print('#' * 80)

        if hook_calls != hook_calls_ref:
            import difflib
            raise AssertionError('\n' + ('\n'.join(
                difflib.ndiff(
                    hook_calls_ref.splitlines(),
                    hook_calls.splitlines(),
                ))))

        old_event_files = []

        files_after = tuple(tmp_dir.glob('*'))
        assert len(files_after) == 2, files_after
        for file in sorted(files_after):
            if 'tfevents' in file.name:
                old_event_files.append(file)
                events = list(load_events_as_dict(file))

                tags = []
                time_rel_data_loading = []
                time_rel_train_step = []
                for event in events:
                    if 'summary' in event.keys():
                        value, = event['summary']['value']
                        tags.append(value['tag'])
                        if value[
                                'tag'] == 'training_timings/time_rel_data_loading':
                            time_rel_data_loading.append(value['simple_value'])
                        elif value['tag'] == 'training_timings/time_rel_step':
                            time_rel_train_step.append(value['simple_value'])

                c = dict(collections.Counter(tags))
                # Training summary is written two times (at iteration 3 when
                #   summary_trigger triggers and when training stops and
                #   summary_hook is closed).
                # Validation summary is written when checkpoint_trigger
                #   triggers, hence 3 times.
                #   non_validation_time can only be measured between
                #   validations => 2 values (one fewer than validation_time)
                expect = {
                    'training/grad_norm': 2,
                    'training/grad_norm_': 2,
                    'training/loss': 2,
                    'training/lr/param_group_0': 2,
                    'training_timings/time_per_iteration': 2,
                    'training_timings/time_rel_to_device': 2,
                    'training_timings/time_rel_forward': 2,
                    'training_timings/time_rel_review': 2,
                    'training_timings/time_rel_backward': 2,
                    'training_timings/time_rel_data_loading': 2,
                    'training_timings/time_rel_step': 2,
                    'validation/loss': 3,
                    'validation/lr/param_group_0': 3,
                    'validation_timings/time_per_iteration': 3,
                    'validation_timings/time_rel_to_device': 3,
                    'validation_timings/time_rel_forward': 3,
                    'validation_timings/time_rel_review': 3,
                    'validation_timings/time_rel_backward': 3,
                    'validation_timings/time_rel_data_loading': 3,
                    'validation_timings/time_rel_step': 3,
                    # non validation time can only be measured between
                    # validations:
                    #  => # of non_val_time - 1 == # of val_time
                    'validation_timings/non_validation_time': 2,
                    'validation_timings/validation_time': 3,
                }
                pprint(c)
                assert c == expect, c
                assert len(events) == 55, (len(events), events)

                assert len(time_rel_data_loading) > 0, (time_rel_data_loading,
                                                        time_rel_train_step)
                assert len(time_rel_train_step) > 0, (time_rel_data_loading,
                                                      time_rel_train_step)
                np.testing.assert_allclose(
                    np.add(time_rel_data_loading, time_rel_train_step),
                    1,
                    err_msg=f'{time_rel_data_loading}, {time_rel_train_step})')

            elif file.name == 'checkpoints':
                checkpoints_files = tuple(file.glob('*'))
                assert len(checkpoints_files) == 6, checkpoints_files
                checkpoints_files_name = [f.name for f in checkpoints_files]
                expect = {
                    'ckpt_0.pth', 'ckpt_2.pth', 'ckpt_4.pth',
                    'validation_state.json', 'ckpt_best_loss.pth',
                    'ckpt_latest.pth'
                }
                assert expect == set(checkpoints_files_name), (
                    expect, checkpoints_files_name)
                ckpt_ranking = pb.io.load_json(
                    file / 'validation_state.json')['ckpt_ranking']
                assert ckpt_ranking[0][1] > 0, ckpt_ranking
                for ckpt in ckpt_ranking:
                    ckpt[1] = -1
                expect = [[f'ckpt_{i}.pth', -1] for i in [0, 2, 4]]
                assert ckpt_ranking == expect, (ckpt_ranking, expect)

                for symlink in [
                        file / 'ckpt_latest.pth',
                        file / 'ckpt_best_loss.pth',
                ]:
                    assert symlink.is_symlink(), symlink

                    target = os.readlink(str(symlink))
                    if '/' in target:
                        raise AssertionError(
                            f'The symlink {symlink} contains a "/".\n'
                            f'Expected that the symlink has a ralative target,\n'
                            f'but the target is: {target}')
            else:
                raise ValueError(file)

        post_state_dict = copy.deepcopy(t.state_dict())
        assert pre_state_dict.keys() == post_state_dict.keys()

        equal_amount = {
            key: (pt.utils.to_numpy(parameter_pre) == pt.utils.to_numpy(
                post_state_dict['model'][key])).mean()
            for key, parameter_pre in pre_state_dict['model'].items()
        }

        # ToDo: why are so many weights unchanged? Maybe the zeros in the image?
        assert equal_amount == {'l.bias': 0.0, 'l.weight': 0.6900510204081632}

        import time
        # tfevents use unixtime as unique indicator. Sleep 2 seconds to ensure
        # new value
        time.sleep(2)

        config['stop_trigger'] = (4, 'epoch')
        t = pt.Trainer.from_config(config)
        t.register_validation_hook(validation_iterator=dt_dataset,
                                   max_checkpoints=None)
        log_list = []
        for hook in t.hooks:
            for k, v in list(hook.__dict__.items()):
                if isinstance(v, pt.train.trigger.Trigger):
                    hook.__dict__[k] = TriggerMock(v, log_list)
        t.train(train_iterator=tr_dataset, resume=True)

        hook_calls = ('\n'.join(log_list))

        hook_calls_ref = textwrap.dedent('''
        I:4, E: 2, False, SummaryHook.pre_step
        I:4, E: 2, False, CheckpointHook.pre_step
        I:4, E: 2, False, BackOffValidationHook.pre_step
        I:4, E: 2, False, StopTrainingHook.pre_step
        I:5, E: 2, False, SummaryHook.pre_step
        I:5, E: 2, False, CheckpointHook.pre_step
        I:5, E: 2, False, BackOffValidationHook.pre_step
        I:5, E: 2, False, StopTrainingHook.pre_step
        I:6, E: 3, True, SummaryHook.pre_step
        I:6, E: 3, True, CheckpointHook.pre_step
        I:6, E: 3, True, BackOffValidationHook.pre_step
        I:6, E: 3, False, StopTrainingHook.pre_step
        I:7, E: 3, False, SummaryHook.pre_step
        I:7, E: 3, False, CheckpointHook.pre_step
        I:7, E: 3, False, BackOffValidationHook.pre_step
        I:7, E: 3, False, StopTrainingHook.pre_step
        I:8, E: 4, False, SummaryHook.pre_step
        I:8, E: 4, True, CheckpointHook.pre_step
        I:8, E: 4, True, BackOffValidationHook.pre_step
        I:8, E: 4, True, StopTrainingHook.pre_step
        ''').strip()

        print('#' * 80)
        print(hook_calls)
        print('#' * 80)

        if hook_calls != hook_calls_ref:
            import difflib
            raise AssertionError('\n' + ('\n'.join(
                difflib.ndiff(
                    hook_calls_ref.splitlines(),
                    hook_calls.splitlines(),
                ))))

        files_after = tuple(tmp_dir.glob('*'))
        assert len(files_after) == 3, files_after
        for file in sorted(files_after):
            if 'tfevents' in file.name:
                if file in old_event_files:
                    continue

                events = list(load_events_as_dict(file))

                tags = []
                for event in events:
                    if 'summary' in event.keys():
                        value, = event['summary']['value']
                        tags.append(value['tag'])

                c = dict(collections.Counter(tags))
                assert len(events) == 44, (len(events), events)
                expect = {
                    'training/grad_norm': 2,
                    'training/grad_norm_': 2,
                    'training/loss': 2,
                    'training/lr/param_group_0': 2,
                    'training_timings/time_per_iteration': 2,
                    'training_timings/time_rel_to_device': 2,
                    'training_timings/time_rel_forward': 2,
                    'training_timings/time_rel_review': 2,
                    'training_timings/time_rel_backward': 2,
                    'training_timings/time_rel_data_loading': 2,
                    'training_timings/time_rel_step': 2,
                    'validation/loss': 2,
                    'validation/lr/param_group_0': 2,
                    'validation_timings/time_per_iteration': 2,
                    'validation_timings/time_rel_to_device': 2,
                    'validation_timings/time_rel_forward': 2,
                    'validation_timings/time_rel_review': 2,
                    'validation_timings/time_rel_backward': 2,
                    'validation_timings/time_rel_data_loading': 2,
                    'validation_timings/time_rel_step': 2,
                    # non validation time can only be measured between
                    # validations:
                    #  => # of non_val_time - 1 == # of val_time
                    'validation_timings/non_validation_time': 1,
                    'validation_timings/validation_time': 2,
                }
                assert c == expect, c
            elif file.name == 'checkpoints':
                checkpoints_files = tuple(file.glob('*'))
                assert len(checkpoints_files) == 8, checkpoints_files
                checkpoints_files_name = [f.name for f in checkpoints_files]
                expect = {
                    *[f'ckpt_{i}.pth' for i in [0, 2, 4, 6, 8]],
                    'validation_state.json', 'ckpt_best_loss.pth',
                    'ckpt_latest.pth'
                }
                assert expect == set(checkpoints_files_name), (
                    expect, checkpoints_files_name)
            else:
                raise ValueError(file)
Example #25
0
 def print_tree(self):
     try:
         from IPython.lib.pretty import pprint
     except ImportError:
         from pprint import pprint
     pprint(self)
Example #26
0
 def pp_obj_size(self, obj):
     from IPython.lib.pretty import pprint 
     pprint(obj)
     print(f'\nObject size is {sys.getsizeof(obj)} bytes')