def getJson(*parts, method='get', postData=None): '''Submit a request and return the response for testing''' # construct the URL path = root + makeURL(*parts) # start the request object request = urllib.request.Request(path, method=method) # if we have data to post encode it properly if postData: # add the content header to signal json is coming request.add_header('Content-Type', 'application/json; charset=utf-8') # encode as json and then as utf-8 bytes postData = json.dumps(postData).encode('utf-8') # tell it the length of the data request.add_header('Content-Length', len(postData)) result = None try: with urllib.request.urlopen(request, postData) as fp: code = fp.getcode() if code == 200: data = fp.read().decode('utf-8') result = json.loads(data) except urllib.error.HTTPError as e: code = e.code print(f'http response = {code}') if result: pprint(result, max_seq_length=8) return result or code
def _ipython_display_(self, **kwargs): widget = self._widget() if widget is not None: return widget._ipython_display_(**kwargs) from IPython.lib.pretty import pprint pprint(self)
def main(_config, _run: sacred.run.Run): """ """ sacred.commands.print_config(_run) model = Model.from_config(_config['model']) print('Model config') pprint(model.config)
def pprint(*args, **kwargs): """ Pretty-print a Python object using ``IPython.lib.pretty.pprint``. Fallback to ``pprint.pprint`` if IPython is not available. """ try: from IPython.lib.pretty import pprint except ImportError: from pprint import pprint pprint(*args, **kwargs)
def test_ipython_pprint(): from io import StringIO eqs = Equations("""dv/dt = -(v + I)/ tau : volt (unless refractory) I = sin(2 * 22/7. * f * t)* volt : volt f : Hz""") # Test ipython's pretty printing old_stdout = sys.stdout string_output = StringIO() sys.stdout = string_output pprint(eqs) assert len(string_output.getvalue()) > 0 sys.stdout = old_stdout
def test_ipython_pprint(): if pprint is None: raise SkipTest('ipython is not available') eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless refractory) I = sin(2 * 22/7. * f * t)* volt : volt f : Hz''') # Test ipython's pretty printing old_stdout = sys.stdout string_output = StringIO() sys.stdout = string_output pprint(eqs) assert len(string_output.getvalue()) > 0 sys.stdout = old_stdout
def main(_config, _run: sacred.run.Run): """ python parametized.py print_config python parametized.py print_config with model.kwargs.encoder.cls=RecurrentEncoder model.kwargs.vae_param=10 """ from IPython.lib.pretty import pprint sacred.commands.print_config(_run) model = VAE.from_config(_config['model']) print('Model config') pprint(model.config) print('Encoder config') pprint(model.encoder)
def main(argv=None): """ psd-tools command line utility. Usage: psd-tools export <input_file> <output_file> [options] psd-tools show <input_file> [options] psd-tools debug <input_file> [options] psd-tools -h | --help psd-tools --version Options: -v --verbose Be more verbose. Example: psd-tools show example.psd # Show the file content psd-tools export example.psd example.png # Export as PNG psd-tools export example.psd[0] example-0.png # Export layer as PNG """ args = docopt.docopt(main.__doc__, version=__version__, argv=argv) if args['--verbose']: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) if args['export']: input_parts = args['<input_file>'].split('[') input_file = input_parts[0] if len(input_parts) > 1: indices = [int(x.rstrip(']')) for x in input_parts[1:]] else: indices = [] layer = PSDImage.open(input_file) for index in indices: layer = layer[index] if isinstance(layer, PSDImage) and layer.has_preview(): image = layer.topil() else: image = layer.compose() image.save(args['<output_file>']) elif args['show']: psd = PSDImage.open(args['<input_file>']) pprint(psd) elif args['debug']: psd = PSDImage.open(args['<input_file>']) pprint(psd._record)
def test_ipython_pprint(): try: from cStringIO import StringIO # Python 2 except ImportError: from io import StringIO # Python 3 eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless refractory) I = sin(2 * 22/7. * f * t)* volt : volt f : Hz''') # Test ipython's pretty printing old_stdout = sys.stdout string_output = StringIO() sys.stdout = string_output pprint(eqs) assert len(string_output.getvalue()) > 0 sys.stdout = old_stdout
def search_bar(request): searchText = request.GET['text'] response_data = dict() response_data['number_of_results'] = 0 response_data['results'] = [] if searchText.strip() == "": print "NO STRING" return HttpResponse(json.dumps(response_data), content_type="application/json") members = Member.objects.filter(name__contains=searchText, is_current=True) for member in members: newResult = dict() newResult['id'] = member.id newResult['name'] = member.name newResult['party'] = member.current_party.name newResult['type'] = "member" response_data['results'].append(newResult) response_data['number_of_results'] += 1 tags = Tag.objects.filter(name__contains=searchText) for tag in tags: newResult = dict() newResult['id'] = tag.id newResult['name'] = tag.name newResult['type'] = "tag" response_data['results'].append(newResult) response_data['number_of_results'] += 1 parties = Party.objects.filter(name__contains=searchText, knesset__number=CURRENT_KNESSET_NUMBER) for party in parties: newResult = dict() newResult['id'] = party.id newResult['name'] = party.name newResult['type'] = "party" response_data['results'].append(newResult) response_data['number_of_results'] += 1 print 'number of results:', response_data['number_of_results'] pprint(response_data) return HttpResponse(json.dumps(response_data), content_type="application/json")
def test_str_repr(): ''' Test the string representation (only that it does not throw errors). ''' tau = 10 * ms eqs = Equations('''dv/dt = -(v + I)/ tau : volt (unless-refractory) I = sin(2 * 22/7. * f * t)* volt : volt f : Hz''') assert len(str(eqs)) > 0 assert len(repr(eqs)) > 0 # Test str and repr of SingleEquations explicitly (might already have been # called by Equations for eq in eqs.itervalues(): assert (len(str(eq))) > 0 assert (len(repr(eq))) > 0 # Test ipython's pretty printing old_stdout = sys.stdout string_output = StringIO() sys.stdout = string_output pprint(eqs) assert len(string_output.getvalue()) > 0 sys.stdout = old_stdout
def test_str_repr(): ''' Test the string representation (only that it does not throw errors). ''' tau = 10 * ms eqs = Equations('''dv/dt = -(v + I)/ tau : volt (active) I = sin(2 * 22/7. * f * t)* volt : volt f : Hz''') assert len(str(eqs)) > 0 assert len(repr(eqs)) > 0 # Test str and repr of SingleEquations explicitly (might already have been # called by Equations for eq in eqs.equations.itervalues(): assert(len(str(eq))) > 0 assert(len(repr(eq))) > 0 # Test ipython's pretty printing old_stdout = sys.stdout string_output = StringIO() sys.stdout = string_output pprint(eqs) assert len(string_output.getvalue()) > 0 sys.stdout = old_stdout
def pretty_print_options(options): l = nested_dict_to_list_of_tuples(options) pprint(l)
def scrape(): cwd = os.getcwd() ApiKey = 'eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo' #url = 'https://api.nasa.gov/insight_weather/?api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo&feedtype=json&ver=1.0' #url = 'https://api.nasa.gov/insight_weather/?api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo&feedtype=json&ver=1.0' #url = 'https://api.nasa.gov/DONKI/notifications?startDate=2014-05-01&endDate=2014-05-08&type=all&api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo' #url = 'https://api.nasa.gov/DONKI/notifications?startDate=2014-05-01&endDate=2014-05-08&type=all&api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo' url = 'https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?&table=exoplanets&format=json&where=pl_kepflag=1' #url = 'https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos?sol=1000&page=2&api_key=eMpdHL6BBBPdurOf5a9PbX276bW4lcSgcn1Nltgo' response = requests.get(url).json() pprint("INSIDE ScRAPE") # open a file for writing outputfile = open(f'{cwd}\data\Data1.csv', 'w', newline='') pprint("INSIDE ScRAPE0") # create the csv writer object csvwriter = csv.writer(outputfile) count = 0 for resp in response: if count == 0: header = resp.keys() csvwriter.writerow(header) count += 1 csvwriter.writerow(resp.values()) outputfile.close() pprint("INSIDE ScRAPE1") engine = create_engine('postgresql://*****:*****@localhost/NASA') connection = engine.connect() pprint("INSIDE ScRAPE2") conn = psycopg2.connect( "host='localhost' port='5432' dbname='NASA' user='******' password='******'" ) pprint("INSIDE ScRAPE20") cur = conn.cursor() pprint("INSIDE ScRAPE201") cur.execute("truncate table kepler_system") pprint("INSIDE ScRAPE21") #f = open(r'C:\\bootcamp\VisualProject\Data1.csv', 'r') f = open(f'{cwd}\data\Data1.csv', 'r') cur.copy_from(f, "kepler_system", sep=',') f.close() pprint("INSIDE ScRAPE22") conn.commit() conn.close() # cur.execute("""Copy kepler_system from 'C:\\bootcamp\VisualProject\Data1.csv';""") # conn.commit() # conn.close() #cur1 = conn.cursor() pprint("INSIDE ScRAPE3") conn = psycopg2.connect( "host='localhost' port='5432' dbname='NASA' user='******' password='******'" ) cur = conn.cursor() #sql = "copy (SELECT * FROM kepler_system ) TO 'C:\\bootcamp\VisualProject\DataForD3.csv' CSV HEADER WITH CSV DELIMITER ',';" #sql = f"copy (SELECT * FROM kepler_small) TO '{cwd}\static\DataForD3.csv' CSV;" sql = f"copy (SELECT * FROM kepler_system ) TO '{cwd}\static\DataForD3.csv' CSV;" pprint("INSIDE ScRAPE4") cur.execute(sql) cur.close() # f = open(r'C:\\bootcamp\VisualProject\DataForD3.csv', 'w') # cur.copy_from(f, "kepler_system", sep=',') # f.close() return "Finished"
def pretty_print(point_tuple): """ Pretty print the tuple representation of the options. """ pprint(tuple(zip(parameterNames, point_tuple)))
def test_repr_pretty(fixture): fixture.__repr__() pprint(fixture)
'''path : pathSegments''' p[0] = IdNode(p[1]) def p_path_segments(p): '''pathSegments : pathSegments SEP ID | ID ''' if len(p) == 4: seg = p[1] seg.push(p[3]) else: seg = [p[1]] p[0] = seg def compile(template): lex.lex() # lex.lexer.push_state('mu') lex.input(template) while 1: tok = lex.token() if not tok: break print tok yacc.yacc() return yacc.parse(template) if __name__ == '__main__': # print compile("test") x = compile("{{#if nothing}}Scratchpad{{else}}{{# complicated}}{{> partial}}{{/complicated}}{{/if}}") # print "%s\n%r" % (x, x) pretty.pprint(x, max_width=60)
[ 'python', json2text, '--mictype=worn', file, ], stderr=None, ).stdout for line in kaldi_transcriptions.strip().split('\n'): example_id, transcription = map(str.strip, (line + ' ').split(' ', maxsplit=1)) example_id_split = example_id.split('_') speaker_id, session_id, remaining = example_id_split location, start, end = remaining.split('-') example_id = f'{speaker_id}_{session_id}_{start}-{end}' assert example_id not in all_kaldi_transcriptions, ( example_id, all_kaldi_transcriptions) all_kaldi_transcriptions[example_id] = transcription return all_kaldi_transcriptions if __name__ == '__main__': pprint(list(get_kaldi_transcriptions().items())[:5]) pprint(list(get_kaldi_transcriptions(None).items())[:5])
def test_single_model(): if sys.platform.startswith('win'): pytest.skip( 'this doctest does not work on Windows, ' 'training is not possible on Windows due to symlinks being unavailable' ) tr_dataset, dt_dataset = get_dataset() tr_dataset = tr_dataset[:2] dt_dataset = dt_dataset[:2] with tempfile.TemporaryDirectory() as tmp_dir: tmp_dir = Path(tmp_dir) config = pt.Trainer.get_config(updates=pb.utils.nested.deflatten( { 'model.factory': Model, 'storage_dir': str(tmp_dir), 'stop_trigger': (2, 'epoch'), 'summary_trigger': (3, 'iteration'), 'checkpoint_trigger': (2, 'iteration') })) t = pt.Trainer.from_config(config) pre_state_dict = copy.deepcopy(t.state_dict()) files_before = tuple(tmp_dir.glob('*')) if len(files_before) != 0: # no event file raise Exception(files_before) t.register_validation_hook(validation_iterator=dt_dataset, max_checkpoints=None) # Wrap each trigger in each hook with TriggerMock. log_list = [] for hook in t.hooks: for k, v in list(hook.__dict__.items()): if isinstance(v, pt.train.trigger.Trigger): hook.__dict__[k] = TriggerMock(v, log_list) t.train(train_dataset=tr_dataset, resume=False) hook_calls = ('\n'.join(log_list)) # CheckpointedValidationHook trigger is called two times # (once for checkpointing once for validation)_file_name hook_calls_ref = textwrap.dedent(''' I:0, E: 0, True, SummaryHook.pre_step I:0, E: 0, True, BackOffValidationHook.pre_step I:0, E: 0, True, CheckpointHook.pre_step I:0, E: 0, False, StopTrainingHook.pre_step I:1, E: 0, False, SummaryHook.pre_step I:1, E: 0, False, BackOffValidationHook.pre_step I:1, E: 0, False, CheckpointHook.pre_step I:1, E: 0, False, StopTrainingHook.pre_step I:2, E: 1, False, SummaryHook.pre_step I:2, E: 1, True, BackOffValidationHook.pre_step I:2, E: 1, True, CheckpointHook.pre_step I:2, E: 1, False, StopTrainingHook.pre_step I:3, E: 1, True, SummaryHook.pre_step I:3, E: 1, False, BackOffValidationHook.pre_step I:3, E: 1, False, CheckpointHook.pre_step I:3, E: 1, False, StopTrainingHook.pre_step I:4, E: 2, False, SummaryHook.pre_step I:4, E: 2, True, BackOffValidationHook.pre_step I:4, E: 2, True, CheckpointHook.pre_step I:4, E: 2, True, StopTrainingHook.pre_step ''').strip() print('#' * 80) print(hook_calls) print('#' * 80) if hook_calls != hook_calls_ref: import difflib raise AssertionError('\n' + ('\n'.join( difflib.ndiff( hook_calls_ref.splitlines(), hook_calls.splitlines(), )))) old_event_files = [] files_after = tuple(tmp_dir.glob('*')) assert len(files_after) == 2, files_after for file in sorted(files_after): if 'tfevents' in file.name: old_event_files.append(file) events = list(load_events_as_dict(file)) tags = [] # time_rel_data_loading = [] # time_rel_train_step = [] time_per_iteration = [] relative_timings = collections.defaultdict(list) relative_timing_keys = { 'training_timings/time_rel_data_loading', 'training_timings/time_rel_to_device', 'training_timings/time_rel_forward', 'training_timings/time_rel_review', 'training_timings/time_rel_backward', 'training_timings/time_rel_optimize', } for event in events: if 'summary' in event.keys(): value, = event['summary']['value'] tags.append(value['tag']) if value['tag'] in relative_timing_keys: relative_timings[value['tag']].append( value['simple_value']) elif value[ 'tag'] == 'training_timings/time_per_iteration': time_per_iteration.append(value['simple_value']) c = dict(collections.Counter(tags)) # Training summary is written two times (at iteration 3 when # summary_trigger triggers and when training stops and # summary_hook is closed). # Validation summary is written when checkpoint_trigger # triggers, hence 3 times. # non_validation_time can only be measured between # validations => 2 values (one fewer than validation_time) expect = { 'training/grad_norm': 2, 'training/grad_norm_': 2, 'training/loss': 2, 'training/lr/param_group_0': 2, 'training_timings/time_per_iteration': 2, 'training_timings/time_rel_to_device': 2, 'training_timings/time_rel_forward': 2, 'training_timings/time_rel_review': 2, 'training_timings/time_rel_backward': 2, 'training_timings/time_rel_optimize': 2, 'training_timings/time_rel_data_loading': 2, # 'training_timings/time_rel_step': 2, 'validation/loss': 3, 'validation_timings/time_per_iteration': 3, 'validation_timings/time_rel_to_device': 3, 'validation_timings/time_rel_forward': 3, 'validation_timings/time_rel_review': 3, 'validation_timings/time_rel_data_loading': 3, # 'validation_timings/time_rel_step': 3, # non validation time can only be measured between # validations: # => # of non_val_time - 1 == # of val_time 'validation_timings/non_validation_time': 2, 'validation_timings/validation_time': 3, } pprint(c) if c != expect: import difflib raise AssertionError('\n' + ('\n'.join( difflib.ndiff( [ f'{k!r}: {v!r}' for k, v in sorted(expect.items()) ], [f'{k!r}: {v!r}' for k, v in sorted(c.items())], )))) assert len(events) == 46, (len(events), events) assert relative_timing_keys == set( relative_timings.keys()), (relative_timing_keys, relative_timings) for k, v in relative_timings.items(): assert len(v) > 0, (k, v, relative_timings) # The relative timings should sum up to one, # but this model is really cheap. # e.g. 0.00108 and 0.000604 per iteration. # This may cause the mismatch. # Allow a calculation error of 25%. # ToDo: Get this work with less than 1% error. relative_times = np.array(list( relative_timings.values())).sum(axis=0) if not np.all(relative_times > 0.75): raise AssertionError( pretty((relative_times, time_per_iteration, dict(relative_timings)))) if not np.all(relative_times <= 1): raise AssertionError( pretty((relative_times, time_per_iteration, dict(relative_timings)))) elif file.name == 'checkpoints': checkpoints_files = tuple(file.glob('*')) assert len(checkpoints_files) == 5, checkpoints_files checkpoints_files_name = [f.name for f in checkpoints_files] expect = { 'ckpt_0.pth', 'ckpt_2.pth', 'ckpt_4.pth', 'ckpt_best_loss.pth', 'ckpt_latest.pth' } assert expect == set(checkpoints_files_name), ( expect, checkpoints_files_name) ckpt_ranking = torch.load( str(file / 'ckpt_latest.pth' ))['hooks']['BackOffValidationHook']['ckpt_ranking'] assert ckpt_ranking[0][1] > 0, ckpt_ranking for i, ckpt in enumerate(ckpt_ranking): ckpt_ranking[i] = (ckpt[0], -1) expect = [(f'ckpt_{i}.pth', -1) for i in [0, 2, 4]] assert ckpt_ranking == expect, (ckpt_ranking, expect) for symlink in [ file / 'ckpt_latest.pth', file / 'ckpt_best_loss.pth', ]: assert symlink.is_symlink(), symlink target = os.readlink(str(symlink)) if '/' in target: raise AssertionError( f'The symlink {symlink} contains a "/".\n' f'Expected that the symlink has a ralative target,\n' f'but the target is: {target}') else: raise ValueError(file) post_state_dict = copy.deepcopy(t.state_dict()) assert pre_state_dict.keys() == post_state_dict.keys() equal_amount = { key: (pt.utils.to_numpy(parameter_pre) == pt.utils.to_numpy( post_state_dict['model'][key])).mean() for key, parameter_pre in pre_state_dict['model'].items() } # ToDo: why are so many weights unchanged? Maybe the zeros in the image? assert equal_amount == {'l.bias': 0.0, 'l.weight': 0.6900510204081632} import time # tfevents use unixtime as unique indicator. Sleep 2 seconds to ensure # new value time.sleep(2) config['stop_trigger'] = (4, 'epoch') t = pt.Trainer.from_config(config) t.register_validation_hook(validation_iterator=dt_dataset, max_checkpoints=None) log_list = [] for hook in t.hooks: for k, v in list(hook.__dict__.items()): if isinstance(v, pt.train.trigger.Trigger): hook.__dict__[k] = TriggerMock(v, log_list) t.train(train_dataset=tr_dataset, resume=True) hook_calls = ('\n'.join(log_list)) hook_calls_ref = textwrap.dedent(''' I:4, E: 2, False, SummaryHook.pre_step I:4, E: 2, False, BackOffValidationHook.pre_step I:4, E: 2, False, CheckpointHook.pre_step I:4, E: 2, False, StopTrainingHook.pre_step I:5, E: 2, False, SummaryHook.pre_step I:5, E: 2, False, BackOffValidationHook.pre_step I:5, E: 2, False, CheckpointHook.pre_step I:5, E: 2, False, StopTrainingHook.pre_step I:6, E: 3, True, SummaryHook.pre_step I:6, E: 3, True, BackOffValidationHook.pre_step I:6, E: 3, True, CheckpointHook.pre_step I:6, E: 3, False, StopTrainingHook.pre_step I:7, E: 3, False, SummaryHook.pre_step I:7, E: 3, False, BackOffValidationHook.pre_step I:7, E: 3, False, CheckpointHook.pre_step I:7, E: 3, False, StopTrainingHook.pre_step I:8, E: 4, False, SummaryHook.pre_step I:8, E: 4, True, BackOffValidationHook.pre_step I:8, E: 4, True, CheckpointHook.pre_step I:8, E: 4, True, StopTrainingHook.pre_step ''').strip() print('#' * 80) print(hook_calls) print('#' * 80) if hook_calls != hook_calls_ref: import difflib raise AssertionError('\n' + ('\n'.join( difflib.ndiff( hook_calls_ref.splitlines(), hook_calls.splitlines(), )))) files_after = tuple(tmp_dir.glob('*')) assert len(files_after) == 3, files_after for file in sorted(files_after): if 'tfevents' in file.name: if file in old_event_files: continue events = list(load_events_as_dict(file)) tags = [] for event in events: if 'summary' in event.keys(): value, = event['summary']['value'] tags.append(value['tag']) c = dict(collections.Counter(tags)) assert len(events) == 38, (len(events), events) expect = { 'training/grad_norm': 2, 'training/grad_norm_': 2, 'training/loss': 2, 'training/lr/param_group_0': 2, 'training_timings/time_per_iteration': 2, 'training_timings/time_rel_to_device': 2, 'training_timings/time_rel_forward': 2, 'training_timings/time_rel_review': 2, 'training_timings/time_rel_backward': 2, 'training_timings/time_rel_optimize': 2, 'training_timings/time_rel_data_loading': 2, # 'training_timings/time_rel_step': 2, 'validation/loss': 2, # 'validation/lr/param_group_0': 2, 'validation_timings/time_per_iteration': 2, 'validation_timings/time_rel_to_device': 2, 'validation_timings/time_rel_forward': 2, 'validation_timings/time_rel_review': 2, 'validation_timings/time_rel_data_loading': 2, # 'validation_timings/time_rel_step': 2, # non validation time can only be measured between # validations: # => # of non_val_time - 1 == # of val_time 'validation_timings/non_validation_time': 1, 'validation_timings/validation_time': 2, } if c != expect: import difflib raise AssertionError('\n' + ('\n'.join( difflib.ndiff( [ f'{k!r}: {v!r}' for k, v in sorted(expect.items()) ], [f'{k!r}: {v!r}' for k, v in sorted(c.items())], )))) elif file.name == 'checkpoints': checkpoints_files = tuple(file.glob('*')) assert len(checkpoints_files) == 7, checkpoints_files checkpoints_files_name = [f.name for f in checkpoints_files] expect = { *[f'ckpt_{i}.pth' for i in [0, 2, 4, 6, 8]], 'ckpt_best_loss.pth', 'ckpt_latest.pth' } assert expect == set(checkpoints_files_name), ( expect, checkpoints_files_name) else: raise ValueError(file)
dic["dalhousie university"] = dic["dalhousie university"] + 1 dic_count[ "dalhousie university"] = dic_count["dalhousie university"] + 1 print("\n\nTotal Docuemnts:" + str(counter)) finalop = [] for x in dic.keys(): finalop.append([ x, dic_count[x], "{0}/{1}={2:.2f}".format(counter, dic_count[x], (counter / dic_count[x])), "{0:.2f}".format(m.log(counter / dic_count[x], 10)) ]) final_tab = pd.DataFrame.from_records( finalop, columns=["Search Query", "DF", "N/DF", "Log(N/DF)"]) pprint(final_tab) # print(dic) # print(dic_count) # reference for converting list into dataframe https://thispointer.com/python-pandas-how-to-convert-lists-to-a-dataframe/ print("\n\nTerm: Canada") col = [ "Canada appeared in " + str(len(canada_count)) + " documents", "Total Words(m)", "Frequency(f)" ] canada_df = pd.DataFrame.from_records(canada_count, columns=col) pprint(canada_df) print( "\nMaximum f/m for word Canada is noted in article #{0} and f/m value is {1:.2f}"
def print_options_as_nested_dict(point_tuple): pprint( unmangle_tuples_to_nested_dict(tuple(zip(parameterNames, point_tuple))))
bow[word] = bow[word] + 1 polarity = "NEUTRAL" if (pcount > ncount): polarity = "POSITIVE" elif (pcount < ncount): polarity = "NEGATIVE" output.append([counter, line, ' '.join(match), polarity]) counter = counter + 1 print("\n Storing output into csv files") df = pd.DataFrame.from_records(output, columns=["ID", "Tweet", "Match", "Polarity"]) df.to_csv("data/output.csv", index=False) print("\n Tweet analysis") pprint(df) final_bow = [] for x in bow: if x in plist: final_bow.append([x, bow[x], "POSITIVE"]) elif x in nlist: final_bow.append([x, bow[x], "NEGATIVE"]) # reference for converting list into dataframe https://thispointer.com/python-pandas-how-to-convert-lists-to-a-dataframe/ # crate dataframe based on bow df_bow = pd.DataFrame.from_records(final_bow, columns=["Word", "Frequency", "Polarity"]) df_bow.to_csv("data/bog.csv", index=False) print("/n Bag of Word with frequency and polarity") pprint(df_bow)
def test_single_model(): tr_dataset, dt_dataset = get_dataset() tr_dataset = tr_dataset[:2] dt_dataset = dt_dataset[:2] with tempfile.TemporaryDirectory() as tmp_dir: tmp_dir = Path(tmp_dir) config = pt.Trainer.get_config(updates=pb.utils.nested.deflatten( { 'model.factory': Model, 'storage_dir': str(tmp_dir), 'stop_trigger': (2, 'epoch'), 'summary_trigger': (3, 'iteration'), 'checkpoint_trigger': (2, 'iteration') })) t = pt.Trainer.from_config(config) pre_state_dict = copy.deepcopy(t.state_dict()) files_before = tuple(tmp_dir.glob('*')) if len(files_before) != 0: # no event file raise Exception(files_before) t.register_validation_hook(validation_iterator=dt_dataset, max_checkpoints=None) # Wrap each trigger in each hook with TriggerMock. log_list = [] for hook in t.hooks: for k, v in list(hook.__dict__.items()): if isinstance(v, pt.train.trigger.Trigger): hook.__dict__[k] = TriggerMock(v, log_list) t.train(train_iterator=tr_dataset, resume=False) hook_calls = ('\n'.join(log_list)) # CheckpointedValidationHook trigger is called two times # (once for checkpointing once for validation)_file_name hook_calls_ref = textwrap.dedent(''' I:0, E: 0, True, SummaryHook.pre_step I:0, E: 0, True, CheckpointHook.pre_step I:0, E: 0, True, BackOffValidationHook.pre_step I:0, E: 0, False, StopTrainingHook.pre_step I:1, E: 0, False, SummaryHook.pre_step I:1, E: 0, False, CheckpointHook.pre_step I:1, E: 0, False, BackOffValidationHook.pre_step I:1, E: 0, False, StopTrainingHook.pre_step I:2, E: 1, False, SummaryHook.pre_step I:2, E: 1, True, CheckpointHook.pre_step I:2, E: 1, True, BackOffValidationHook.pre_step I:2, E: 1, False, StopTrainingHook.pre_step I:3, E: 1, True, SummaryHook.pre_step I:3, E: 1, False, CheckpointHook.pre_step I:3, E: 1, False, BackOffValidationHook.pre_step I:3, E: 1, False, StopTrainingHook.pre_step I:4, E: 2, False, SummaryHook.pre_step I:4, E: 2, True, CheckpointHook.pre_step I:4, E: 2, True, BackOffValidationHook.pre_step I:4, E: 2, True, StopTrainingHook.pre_step ''').strip() print('#' * 80) print(hook_calls) print('#' * 80) if hook_calls != hook_calls_ref: import difflib raise AssertionError('\n' + ('\n'.join( difflib.ndiff( hook_calls_ref.splitlines(), hook_calls.splitlines(), )))) old_event_files = [] files_after = tuple(tmp_dir.glob('*')) assert len(files_after) == 2, files_after for file in sorted(files_after): if 'tfevents' in file.name: old_event_files.append(file) events = list(load_events_as_dict(file)) tags = [] time_rel_data_loading = [] time_rel_train_step = [] for event in events: if 'summary' in event.keys(): value, = event['summary']['value'] tags.append(value['tag']) if value[ 'tag'] == 'training_timings/time_rel_data_loading': time_rel_data_loading.append(value['simple_value']) elif value['tag'] == 'training_timings/time_rel_step': time_rel_train_step.append(value['simple_value']) c = dict(collections.Counter(tags)) # Training summary is written two times (at iteration 3 when # summary_trigger triggers and when training stops and # summary_hook is closed). # Validation summary is written when checkpoint_trigger # triggers, hence 3 times. # non_validation_time can only be measured between # validations => 2 values (one fewer than validation_time) expect = { 'training/grad_norm': 2, 'training/grad_norm_': 2, 'training/loss': 2, 'training/lr/param_group_0': 2, 'training_timings/time_per_iteration': 2, 'training_timings/time_rel_to_device': 2, 'training_timings/time_rel_forward': 2, 'training_timings/time_rel_review': 2, 'training_timings/time_rel_backward': 2, 'training_timings/time_rel_data_loading': 2, 'training_timings/time_rel_step': 2, 'validation/loss': 3, 'validation/lr/param_group_0': 3, 'validation_timings/time_per_iteration': 3, 'validation_timings/time_rel_to_device': 3, 'validation_timings/time_rel_forward': 3, 'validation_timings/time_rel_review': 3, 'validation_timings/time_rel_backward': 3, 'validation_timings/time_rel_data_loading': 3, 'validation_timings/time_rel_step': 3, # non validation time can only be measured between # validations: # => # of non_val_time - 1 == # of val_time 'validation_timings/non_validation_time': 2, 'validation_timings/validation_time': 3, } pprint(c) assert c == expect, c assert len(events) == 55, (len(events), events) assert len(time_rel_data_loading) > 0, (time_rel_data_loading, time_rel_train_step) assert len(time_rel_train_step) > 0, (time_rel_data_loading, time_rel_train_step) np.testing.assert_allclose( np.add(time_rel_data_loading, time_rel_train_step), 1, err_msg=f'{time_rel_data_loading}, {time_rel_train_step})') elif file.name == 'checkpoints': checkpoints_files = tuple(file.glob('*')) assert len(checkpoints_files) == 6, checkpoints_files checkpoints_files_name = [f.name for f in checkpoints_files] expect = { 'ckpt_0.pth', 'ckpt_2.pth', 'ckpt_4.pth', 'validation_state.json', 'ckpt_best_loss.pth', 'ckpt_latest.pth' } assert expect == set(checkpoints_files_name), ( expect, checkpoints_files_name) ckpt_ranking = pb.io.load_json( file / 'validation_state.json')['ckpt_ranking'] assert ckpt_ranking[0][1] > 0, ckpt_ranking for ckpt in ckpt_ranking: ckpt[1] = -1 expect = [[f'ckpt_{i}.pth', -1] for i in [0, 2, 4]] assert ckpt_ranking == expect, (ckpt_ranking, expect) for symlink in [ file / 'ckpt_latest.pth', file / 'ckpt_best_loss.pth', ]: assert symlink.is_symlink(), symlink target = os.readlink(str(symlink)) if '/' in target: raise AssertionError( f'The symlink {symlink} contains a "/".\n' f'Expected that the symlink has a ralative target,\n' f'but the target is: {target}') else: raise ValueError(file) post_state_dict = copy.deepcopy(t.state_dict()) assert pre_state_dict.keys() == post_state_dict.keys() equal_amount = { key: (pt.utils.to_numpy(parameter_pre) == pt.utils.to_numpy( post_state_dict['model'][key])).mean() for key, parameter_pre in pre_state_dict['model'].items() } # ToDo: why are so many weights unchanged? Maybe the zeros in the image? assert equal_amount == {'l.bias': 0.0, 'l.weight': 0.6900510204081632} import time # tfevents use unixtime as unique indicator. Sleep 2 seconds to ensure # new value time.sleep(2) config['stop_trigger'] = (4, 'epoch') t = pt.Trainer.from_config(config) t.register_validation_hook(validation_iterator=dt_dataset, max_checkpoints=None) log_list = [] for hook in t.hooks: for k, v in list(hook.__dict__.items()): if isinstance(v, pt.train.trigger.Trigger): hook.__dict__[k] = TriggerMock(v, log_list) t.train(train_iterator=tr_dataset, resume=True) hook_calls = ('\n'.join(log_list)) hook_calls_ref = textwrap.dedent(''' I:4, E: 2, False, SummaryHook.pre_step I:4, E: 2, False, CheckpointHook.pre_step I:4, E: 2, False, BackOffValidationHook.pre_step I:4, E: 2, False, StopTrainingHook.pre_step I:5, E: 2, False, SummaryHook.pre_step I:5, E: 2, False, CheckpointHook.pre_step I:5, E: 2, False, BackOffValidationHook.pre_step I:5, E: 2, False, StopTrainingHook.pre_step I:6, E: 3, True, SummaryHook.pre_step I:6, E: 3, True, CheckpointHook.pre_step I:6, E: 3, True, BackOffValidationHook.pre_step I:6, E: 3, False, StopTrainingHook.pre_step I:7, E: 3, False, SummaryHook.pre_step I:7, E: 3, False, CheckpointHook.pre_step I:7, E: 3, False, BackOffValidationHook.pre_step I:7, E: 3, False, StopTrainingHook.pre_step I:8, E: 4, False, SummaryHook.pre_step I:8, E: 4, True, CheckpointHook.pre_step I:8, E: 4, True, BackOffValidationHook.pre_step I:8, E: 4, True, StopTrainingHook.pre_step ''').strip() print('#' * 80) print(hook_calls) print('#' * 80) if hook_calls != hook_calls_ref: import difflib raise AssertionError('\n' + ('\n'.join( difflib.ndiff( hook_calls_ref.splitlines(), hook_calls.splitlines(), )))) files_after = tuple(tmp_dir.glob('*')) assert len(files_after) == 3, files_after for file in sorted(files_after): if 'tfevents' in file.name: if file in old_event_files: continue events = list(load_events_as_dict(file)) tags = [] for event in events: if 'summary' in event.keys(): value, = event['summary']['value'] tags.append(value['tag']) c = dict(collections.Counter(tags)) assert len(events) == 44, (len(events), events) expect = { 'training/grad_norm': 2, 'training/grad_norm_': 2, 'training/loss': 2, 'training/lr/param_group_0': 2, 'training_timings/time_per_iteration': 2, 'training_timings/time_rel_to_device': 2, 'training_timings/time_rel_forward': 2, 'training_timings/time_rel_review': 2, 'training_timings/time_rel_backward': 2, 'training_timings/time_rel_data_loading': 2, 'training_timings/time_rel_step': 2, 'validation/loss': 2, 'validation/lr/param_group_0': 2, 'validation_timings/time_per_iteration': 2, 'validation_timings/time_rel_to_device': 2, 'validation_timings/time_rel_forward': 2, 'validation_timings/time_rel_review': 2, 'validation_timings/time_rel_backward': 2, 'validation_timings/time_rel_data_loading': 2, 'validation_timings/time_rel_step': 2, # non validation time can only be measured between # validations: # => # of non_val_time - 1 == # of val_time 'validation_timings/non_validation_time': 1, 'validation_timings/validation_time': 2, } assert c == expect, c elif file.name == 'checkpoints': checkpoints_files = tuple(file.glob('*')) assert len(checkpoints_files) == 8, checkpoints_files checkpoints_files_name = [f.name for f in checkpoints_files] expect = { *[f'ckpt_{i}.pth' for i in [0, 2, 4, 6, 8]], 'validation_state.json', 'ckpt_best_loss.pth', 'ckpt_latest.pth' } assert expect == set(checkpoints_files_name), ( expect, checkpoints_files_name) else: raise ValueError(file)
def print_tree(self): try: from IPython.lib.pretty import pprint except ImportError: from pprint import pprint pprint(self)
def pp_obj_size(self, obj): from IPython.lib.pretty import pprint pprint(obj) print(f'\nObject size is {sys.getsizeof(obj)} bytes')