def test_example_use_with_downloading_a_page(self): CommunityFinder.init_session() tasks = [] for url in URLS: community_finder = CommunityFinder(url) task = asyncio.ensure_future(community_finder.find_communities()) tasks.append(task) loop = asyncio.get_event_loop() res = loop.run_until_complete(asyncio.gather(*tasks)) CommunityFinder.close_session() helpers.print_dict(res)
def test_on_coins(self): projects = json.load(open('coins.json')) total_error = 0 res = [] for project in projects: r = { 'id': project['id'] } if 'community' in project: r['community'] = project['community'] # using without page download c = CommunityFinder('') c.raw_page = json.dumps(project) c.find() # c.data <= here your results r['finded'] = c.data['community'] delta_community = 0 delta_count = 0 for x in r['community']: if not (x == 'medium_www' or x == 'whitepaper'): if x not in r['finded']: delta_community += 1 else: delta_count += abs(len(r['community'][x]) - len(r['finded'][x])) r['delta_community'] = delta_community r['delta_count'] = delta_count total_error += delta_count + delta_community res.append(r) print('Errors count:', total_error) print('Errors: ') helpers.print_dict([x for x in res if x['delta_community'] != 0 or x['delta_count'] != 0])
def sendMessage(dict, mode): msg = getHashedMessage(key, mode, "atm", dict) with socket(family=AF_INET, type=SOCK_STREAM) as sock: sock.settimeout(10) sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) try: sock.connect((ip, port)) msg = getHashedMessage(key, mode, "atm", dict) sock.sendall(bytes(json.dumps(msg, sort_keys=True), "utf-8")) received = sock.recv(1024).strip() try: response = json.loads(received.decode("utf-8")) except: sys.exit(63) tmp = {} tmp.update(response) if isSameHash(key, tmp): sender = response["sender"] if sender != "bank": exit(63) if response["code"] == True: if mode == "new": print_dict(dict) with open(card, "w") as pin: pin.write(dict["account"]) sendAck() exit(0) elif mode == "get": sendAck() tmp = {"balance": response["balance"], "account":dict["account"]} print_dict(tmp) else: sendAck() dict[mode]=dict[mode] print_dict(dict) elif received == 'False': exit(255) else: exit(255) else: exit(63) except timeout as to: exit(63)
def handle(self): self.data = None try: self.data = self.request.recv(1024).strip() except socket.timeout: print("protocol_error handle1", file=stderr) print("protocol_error", flush=True) return try: dict = json.loads(self.data.decode("utf-8")) except: print("protocol_error handle2", file=stderr) print('protocol_error', flush=True) return tmp = {} tmp.update(dict) if isSameHash(key, tmp): sender = dict.pop('sender') if sender != "atm": print("protocol_error handle3", file=stderr) print("protocol_error", flush=True) return dict.pop('hash') mode = dict['mode'] dict.pop('mode') returnType = -1 if mode == 'ack': returnType = -2 print_dict(Bank.lastAction) Bank.lastAction = {} else: Bank.backupAccounts() if mode == "new": returnType = Bank.addAccount(dict) if returnType: dict['initial_balance'] = dict['initial_balance'] elif mode == "deposit": returnType = Bank.deposit(dict) if returnType: dict['deposit'] = dict['deposit'] elif mode == 'withdraw': returnType = Bank.withdraw(dict) if returnType: dict['withdraw'] = dict['withdraw'] elif mode == 'get': msg = Bank.get(dict) if not msg: returnType = 0 else: dict['balance'] = msg[dict['account']] returnType = 2 if returnType == -1: print("protocol_error handle4", file=stderr) print("protocol_error", flush=True) elif returnType != -2: if returnType == 1: msg = getHashedMessage(key=key, mode="res", sender="bank", dict={"code": True}) elif returnType == 0: msg = getHashedMessage(key=key, mode="res", sender="bank", dict={"code": False}) elif returnType == 2: msg = getHashedMessage(key=key, mode="res", sender="bank", dict={ "code": True, "balance": msg[dict['account']] }) self.wfile.write( bytes(json.dumps(msg, sort_keys=True), "utf-8")) if returnType > 0: Bank.lastAction = dict else: print("protocol_error handle5", file=stderr) print("protocol_error", flush=True)
def confirm(): if not Bank.lastAction == {}: print_dict(Bank.lastAction) Bank.lastAction = {} Bank.lastAccounts = {}
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = args.cudnn_benchmark assert (args.steps is None or args.steps > 5) print("CUDNN BENCHMARK ", args.cudnn_benchmark) assert (torch.cuda.is_available()) if args.fp16: optim_level = Optimization.mxprO3 else: optim_level = Optimization.mxprO0 batch_size = args.batch_size jasper_model_definition = toml.load(args.model_toml) dataset_vocab = jasper_model_definition['labels']['labels'] ctc_vocab = add_ctc_labels(dataset_vocab) val_manifest = args.val_manifest featurizer_config = jasper_model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level if args.max_duration is not None: featurizer_config['max_duration'] = args.max_duration if args.pad_to is not None: featurizer_config[ 'pad_to'] = args.pad_to if args.pad_to >= 0 else "max" print('model_config') print_dict(jasper_model_definition) print('feature_config') print_dict(featurizer_config) data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=batch_size, pad_to_max=featurizer_config['pad_to'] == "max", shuffle=False, multi_gpu=False) audio_preprocessor = AudioPreprocessing(**featurizer_config) encoderdecoder = JasperEncoderDecoder( jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab)) if args.ckpt is not None: print("loading model from ", args.ckpt) checkpoint = torch.load(args.ckpt, map_location="cpu") for k in audio_preprocessor.state_dict().keys(): checkpoint['state_dict'][k] = checkpoint['state_dict'].pop( "audio_preprocessor." + k) audio_preprocessor.load_state_dict(checkpoint['state_dict'], strict=False) encoderdecoder.load_state_dict(checkpoint['state_dict'], strict=False) greedy_decoder = GreedyCTCDecoder() # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights())) N = len(data_layer) step_per_epoch = math.ceil(N / args.batch_size) print('-----------------') if args.steps is None: print('Have {0} examples to eval on.'.format(N)) print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch)) else: print('Have {0} examples to eval on.'.format(args.steps * args.batch_size)) print('Have {0} steps / (gpu * epoch).'.format(args.steps)) print('-----------------') audio_preprocessor.cuda() encoderdecoder.cuda() if args.fp16: encoderdecoder = amp.initialize( models=encoderdecoder, opt_level=AmpOptimizations[optim_level]) eval(data_layer=data_layer, audio_processor=audio_preprocessor, encoderdecoder=encoderdecoder, greedy_decoder=greedy_decoder, labels=ctc_vocab, args=args)
print('-----------------') if args.steps is None: print('Have {0} examples to eval on.'.format(N)) print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch)) else: print('Have {0} examples to eval on.'.format(args.steps * args.batch_size)) print('Have {0} steps / (gpu * epoch).'.format(args.steps)) print('-----------------') audio_preprocessor.cuda() encoderdecoder.cuda() if args.fp16: encoderdecoder = amp.initialize( models=encoderdecoder, opt_level=AmpOptimizations[optim_level]) eval(data_layer=data_layer, audio_processor=audio_preprocessor, encoderdecoder=encoderdecoder, greedy_decoder=greedy_decoder, labels=ctc_vocab, args=args) if __name__ == "__main__": args = parse_args() print_dict(vars(args)) main(args)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) assert(torch.cuda.is_available()) torch.backends.cudnn.benchmark = args.cudnn # set up distributed training if args.local_rank is not None: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') multi_gpu = torch.distributed.is_initialized() if multi_gpu: print_once("DISTRIBUTED TRAINING with {} gpus".format(torch.distributed.get_world_size())) # define amp optimiation level if args.fp16: optim_level = Optimization.mxprO1 else: optim_level = Optimization.mxprO0 jasper_model_definition = toml.load(args.model_toml) dataset_vocab = jasper_model_definition['labels']['labels'] ctc_vocab = add_ctc_labels(dataset_vocab) train_manifest = args.train_manifest val_manifest = args.val_manifest featurizer_config = jasper_model_definition['input'] featurizer_config_eval = jasper_model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level featurizer_config_eval["optimization_level"] = optim_level sampler_type = featurizer_config.get("sampler", 'default') perturb_config = jasper_model_definition.get('perturb', None) if args.pad_to_max: assert(args.max_duration > 0) featurizer_config['max_duration'] = args.max_duration featurizer_config_eval['max_duration'] = args.max_duration featurizer_config['pad_to'] = "max" featurizer_config_eval['pad_to'] = "max" print_once('model_config') print_dict(jasper_model_definition) if args.gradient_accumulation_steps < 1: raise ValueError('Invalid gradient accumulation steps parameter {}'.format(args.gradient_accumulation_steps)) if args.batch_size % args.gradient_accumulation_steps != 0: raise ValueError('gradient accumulation step {} is not divisible by batch size {}'.format(args.gradient_accumulation_steps, args.batch_size)) data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, perturb_config=perturb_config, manifest_filepath=train_manifest, labels=dataset_vocab, batch_size=args.batch_size // args.gradient_accumulation_steps, multi_gpu=multi_gpu, pad_to_max=args.pad_to_max, sampler=sampler_type) data_layer_eval = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config_eval, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=args.batch_size, multi_gpu=multi_gpu, pad_to_max=args.pad_to_max ) model = Jasper(feature_config=featurizer_config, jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab)) if args.ckpt is not None: print_once("loading model from {}".format(args.ckpt)) checkpoint = torch.load(args.ckpt, map_location="cpu") model.load_state_dict(checkpoint['state_dict'], strict=True) args.start_epoch = checkpoint['epoch'] else: args.start_epoch = 0 ctc_loss = CTCLossNM( num_classes=len(ctc_vocab)) greedy_decoder = GreedyCTCDecoder() print_once("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights())) print_once("Number of parameters in decode: {0}".format(model.jasper_decoder.num_weights())) N = len(data_layer) if sampler_type == 'default': args.step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) elif sampler_type == 'bucket': args.step_per_epoch = int(len(data_layer.sampler) / args.batch_size ) print_once('-----------------') print_once('Have {0} examples to train on.'.format(N)) print_once('Have {0} steps / (gpu * epoch).'.format(args.step_per_epoch)) print_once('-----------------') fn_lr_policy = lambda s: lr_policy(args.lr, s, args.num_epochs * args.step_per_epoch) model.cuda() if args.optimizer_kind == "novograd": optimizer = Novograd(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer_kind == "adam": optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("invalid optimizer choice: {}".format(args.optimizer_kind)) if optim_level in AmpOptimizations: model, optimizer = amp.initialize( #lnw block for error #min_loss_scale=1.0, models=model, optimizers=optimizer, opt_level=AmpOptimizations[optim_level]) if args.ckpt is not None: optimizer.load_state_dict(checkpoint['optimizer']) model = model_multi_gpu(model, multi_gpu) train( data_layer=data_layer, data_layer_eval=data_layer_eval, model=model, ctc_loss=ctc_loss, greedy_decoder=greedy_decoder, optimizer=optimizer, labels=ctc_vocab, optim_level=optim_level, multi_gpu=multi_gpu, fn_lr_policy=fn_lr_policy if args.lr_decay else None, args=args)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = args.cudnn_benchmark multi_gpu = args.local_rank is not None if multi_gpu: print("DISTRIBUTED with ", torch.distributed.get_world_size()) if args.fp16: optim_level = Optimization.mxprO3 else: optim_level = Optimization.mxprO0 model_definition = toml.load(args.model_toml) dataset_vocab = model_definition['labels']['labels'] ctc_vocab = add_blank_label(dataset_vocab) val_manifest = args.val_manifest featurizer_config = model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level if args.max_duration is not None: featurizer_config['max_duration'] = args.max_duration if args.pad_to is not None: featurizer_config['pad_to'] = args.pad_to if args.pad_to >= 0 else "max" print('model_config') print_dict(model_definition) print('feature_config') print_dict(featurizer_config) data_layer = None if args.wav is None: data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, manifest_filepath=val_manifest, # sampler='bucket', sort_by_duration=args.sort_by_duration, labels=dataset_vocab, batch_size=args.batch_size, pad_to_max=featurizer_config['pad_to'] == "max", shuffle=False, multi_gpu=multi_gpu) audio_preprocessor = AudioPreprocessing(**featurizer_config) #encoderdecoder = JasperEncoderDecoder(jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab)) model = RNNT( feature_config=featurizer_config, rnnt=model_definition['rnnt'], num_classes=len(ctc_vocab) ) if args.ckpt is not None: print("loading model from ", args.ckpt) checkpoint = torch.load(args.ckpt, map_location="cpu") model.load_state_dict(checkpoint['state_dict'], strict=False) if args.ipex: import intel_extension_for_pytorch as ipex from rnn import IPEXStackTime model.joint_net.eval() data_type = torch.bfloat16 if args.mix_precision else torch.float32 if model.encoder["stack_time"].factor == 2: model.encoder["stack_time"] = IPEXStackTime(model.encoder["stack_time"].factor) model.joint_net = ipex.optimize(model.joint_net, dtype=data_type, auto_kernel_selection=True) model.prediction["embed"] = model.prediction["embed"].to(data_type) if args.jit: print("running jit path") model.joint_net.eval() if args.mix_precision: with torch.cpu.amp.autocast(), torch.no_grad(): model.joint_net = torch.jit.trace(model.joint_net, torch.randn(args.batch_size, 1, 1, model_definition['rnnt']['encoder_n_hidden'] + model_definition['rnnt']['pred_n_hidden']), check_trace=False) else: with torch.no_grad(): model.joint_net = torch.jit.trace(model.joint_net, torch.randn(args.batch_size, 1, 1, model_definition['rnnt']['encoder_n_hidden'] + model_definition['rnnt']['pred_n_hidden']), check_trace=False) model.joint_net = torch.jit.freeze(model.joint_net) else: model = model.to("cpu") #greedy_decoder = GreedyCTCDecoder() # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights())) if args.wav is None: N = len(data_layer) # step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_available() else torch.distributed.get_world_size()))) step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) if args.steps is not None: print('-----------------') # print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_available() else torch.distributed.get_world_size()))) print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) print('Have {0} warm up steps / (gpu * epoch).'.format(args.warm_up)) print('Have {0} measure steps / (gpu * epoch).'.format(args.steps)) print('-----------------') else: print('-----------------') print('Have {0} examples to eval on.'.format(N)) print('Have {0} warm up steps / (gpu * epoch).'.format(args.warm_up)) print('Have {0} measure steps / (gpu * epoch).'.format(step_per_epoch)) print('-----------------') else: audio_preprocessor.featurizer.normalize = "per_feature" print ("audio_preprocessor.normalize: ", audio_preprocessor.featurizer.normalize) audio_preprocessor.eval() # eval_transforms = torchvision.transforms.Compose([ # lambda xs: [x.to(ipex.DEVICE) if args.ipex else x.cpu() for x in xs], # lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]], # lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]], # ]) eval_transforms = torchvision.transforms.Compose([ lambda xs: [x.cpu() for x in xs], lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]], lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]], ]) model.eval() if args.ipex: ipex.nn.utils._model_convert.replace_lstm_with_ipex_lstm(model) greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model.module if multi_gpu else model) eval( data_layer=data_layer, audio_processor=eval_transforms, encoderdecoder=model, greedy_decoder=greedy_decoder, labels=ctc_vocab, args=args, multi_gpu=multi_gpu)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = args.cudnn_benchmark print("CUDNN BENCHMARK ", args.cudnn_benchmark) if not args.cpu_run: assert(torch.cuda.is_available()) if args.local_rank is not None: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') multi_gpu = args.local_rank is not None if multi_gpu: print("DISTRIBUTED with ", torch.distributed.get_world_size()) if args.fp16: optim_level = 3 else: optim_level = 0 jasper_model_definition = toml.load(args.model_toml) dataset_vocab = jasper_model_definition['labels']['labels'] ctc_vocab = add_ctc_labels(dataset_vocab) val_manifest = args.val_manifest featurizer_config = jasper_model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level featurizer_config["fp16"] = args.fp16 args.use_conv_mask = jasper_model_definition['encoder'].get('convmask', True) if args.masked_fill is not None: print("{} masked_fill".format("Enabling" if args.masked_fill else "Disabling")) jasper_model_definition["encoder"]["conv_mask"] = args.masked_fill if args.max_duration is not None: featurizer_config['max_duration'] = args.max_duration if args.pad_to is not None: featurizer_config['pad_to'] = args.pad_to if featurizer_config['pad_to'] == "max": featurizer_config['pad_to'] = -1 print('=== model_config ===') print_dict(jasper_model_definition) print() print('=== feature_config ===') print_dict(featurizer_config) print() data_layer = None if args.wav is None: data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=args.batch_size, pad_to_max=featurizer_config['pad_to'] == -1, shuffle=False, multi_gpu=multi_gpu) audio_preprocessor = AudioPreprocessing(**featurizer_config) encoderdecoder = JasperEncoderDecoder(jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab)) if args.ckpt is not None: print("loading model from ", args.ckpt) if os.path.isdir(args.ckpt): exit(0) else: checkpoint = torch.load(args.ckpt, map_location="cpu") for k in audio_preprocessor.state_dict().keys(): checkpoint['state_dict'][k] = checkpoint['state_dict'].pop("audio_preprocessor." + k) audio_preprocessor.load_state_dict(checkpoint['state_dict'], strict=False) encoderdecoder.load_state_dict(checkpoint['state_dict'], strict=False) greedy_decoder = GreedyCTCDecoder() # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights())) if args.wav is None: N = len(data_layer) step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) if args.steps is not None: print('-----------------') print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) print('Have {0} steps / (gpu * epoch).'.format(args.steps)) print('-----------------') else: print('-----------------') print('Have {0} examples to eval on.'.format(N)) print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch)) print('-----------------') print ("audio_preprocessor.normalize: ", audio_preprocessor.featurizer.normalize) if not args.cpu_run: audio_preprocessor.cuda() encoderdecoder.cuda() if args.fp16: encoderdecoder = amp.initialize( models=encoderdecoder, opt_level=AmpOptimizations[optim_level]) encoderdecoder = model_multi_gpu(encoderdecoder, multi_gpu) audio_preprocessor.eval() encoderdecoder.eval() greedy_decoder.eval() eval( data_layer=data_layer, audio_processor=audio_preprocessor, encoderdecoder=encoderdecoder, greedy_decoder=greedy_decoder, labels=ctc_vocab, args=args, multi_gpu=multi_gpu)
def run_strategy(strategy, instruments=["AUD_USD"]): session_id = ''.join([str(random.randint(0, 9)) for _ in range(4)]) timestamp = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%d%H%M%S') # parse arguments args = parse_args() # Create a cerebro entity cerebro = bt.Cerebro() cerebro.addwriter(bt.WriterFile, out=f'output/test_{timestamp}.csv', csv=True, rounding=2) # oanda method 1 # params = { # "from": "2018-01-01T00:00:00Z", # "granularity": "H4", # "includeFirst": True, # "count": 5000, # } # df = get_historical_data(instrument, params) # oanda method 2 (instrument factory) params = { "from": "2018-01-01T00:00:00Z", "granularity": "H4", "to": "2019-01-01T00:00:00Z" } # crypto compare BTC USD # from_date = cc.to_seconds_epoch(datetime.datetime(2016, 1, 1)) # to_date = cc.to_seconds_epoch(datetime.datetime(2018, 1, 1)) # df = cc.get_df(from_date, to_date, time_period='histoday', coin='ETH', data_folder='data') # df = pd.read_pickle(r"C:\Users\vhphan\PycharmProjects\packt\Learn Algorithmic Trading\Chapter5\GOOG_data.pkl") # df = pd.read_csv('data/data_USD_JPY_20191118172246.csv', parse_dates=True, index_col='datetime') # df = df.loc['2014-01-01':'2017-01-01'] # Pass it to the backtrader datafeed and add it to the cerebro # 4 hours data = [] for i, instrument in enumerate(instruments): df = get_historical_data_factory(instrument, params) data.append( bt.feeds.PandasData(dataname=df, timeframe=bt.TimeFrame.Minutes, compression=240)) cerebro.adddata(data[i], name=instrument) # Set our desired cash start cerebro.broker.setcash(100000.0) cerebro.broker.set_shortcash(False) # Add a strategy cerebro.addstrategy(strategy) # Set the commission - 0.1% ... divide by 100 to remove the % cerebro.broker.setcommission(commission=0.001, leverage=1000_000) # Add a FixedSize sizer according to the stake # cerebro.addsizer(bt.sizers.FixedSize, stake=10) # Print out the starting conditions print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue()) # Add the analyzers we are interested in cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name="ta") cerebro.addanalyzer(bt.analyzers.SQN, _name="sqn") cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name="sharpe") cerebro.addanalyzer(bt.analyzers.DrawDown, _name="draw_down") # Run over everything strategies = cerebro.run() first_strategy = strategies[0] # print the analyzers try: print_trade_analysis(first_strategy.analyzers.ta.get_analysis()) # save_trade_analysis(first_strategy.analyzers.ta.get_analysis(), instrument, # f'output/analysis_{strategy.__name__}.csv') print_sharpe_ratio(first_strategy.analyzers.sharpe.get_analysis()) print_sqn(first_strategy.analyzers.sqn.get_analysis()) print_dict(first_strategy.analyzers.draw_down.get_analysis()) except Exception as e: print(e) # Get final portfolio Value portfolio_value = cerebro.broker.getvalue() # Print out the final result print(f'Final Portfolio Value: ${portfolio_value:.2f}') # print('Final Portfolio Value: ${0:.2f}'.format(portvalue)) # plt.style.use('seaborn-notebook') plt.style.use('tableau-colorblind10') plt.rc('grid', color='k', linestyle='-', alpha=0.1) plt.rc('legend', loc='best') # bo = Bokeh() # bo.plot_result(strategies) plot_args = dict( style='candlestick', # legendindloc='best', # legendloc='upper right', # legendloc='upper right', legenddataloc='upper right', grid=True, # Format string for the display of ticks on the x axis fmt_x_ticks='%Y-%b-%d %H:%M', # Format string for the display of data points values fmt_x_data='%Y-%b-%d %H:%M', subplot=True, dpi=900, numfigs=1, # plotymargin=10.0, iplot=False) # save_plots(figs, instrument, strategy, timestamp) # separate plot by data feed. (if there is more than one) if len(first_strategy.datas) > 1: for i in range(len(first_strategy.datas)): for j, d in enumerate(first_strategy.datas): d.plotinfo.plot = i == j # only one data feed to be plot. others = False # first_strategy.observers.buysell[j].plotinfo.plot = i == j cerebro.plot(**plot_args)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = args.cudnn_benchmark print("CUDNN BENCHMARK ", args.cudnn_benchmark) if args.cuda: assert (torch.cuda.is_available()) model_definition = toml.load(args.model_toml) dataset_vocab = model_definition['labels']['labels'] ctc_vocab = add_blank_label(dataset_vocab) val_manifest = args.val_manifest featurizer_config = model_definition['input_eval'] if args.pad_to is not None: featurizer_config[ 'pad_to'] = args.pad_to if args.pad_to >= 0 else "max" print('model_config') print_dict(model_definition) print('feature_config') print_dict(featurizer_config) data_layer = None data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=args.batch_size, pad_to_max=featurizer_config['pad_to'] == "max", shuffle=False) audio_preprocessor = AudioPreprocessing(**featurizer_config) model = RNNT(feature_config=featurizer_config, rnnt=model_definition['rnnt'], num_classes=len(ctc_vocab)) if args.ckpt is not None: print("loading model from ", args.ckpt) checkpoint = torch.load(args.ckpt, map_location="cpu") model.load_state_dict(checkpoint['state_dict'], strict=False) # model = torch.jit.script(model) audio_preprocessor.featurizer.normalize = "per_feature" if args.cuda: audio_preprocessor.cuda() audio_preprocessor.eval() eval_transforms = [] if args.cuda: eval_transforms.append(lambda xs: [x.cuda() for x in xs]) eval_transforms.append(lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]]) # These are just some very confusing transposes, that's all. # BxFxT -> TxBxF eval_transforms.append(lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]]) eval_transforms = torchvision.transforms.Compose(eval_transforms) if args.cuda: model.cuda() # Ideally, I would jit this as well... But this is just the constructor... greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model) eval(data_layer=data_layer, audio_processor=eval_transforms, encoderdecoder=model, greedy_decoder=greedy_decoder, labels=ctc_vocab, args=args)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.benchmark = args.cudnn_benchmark print("CUDNN BENCHMARK ", args.cudnn_benchmark) assert(torch.cuda.is_available()) if args.local_rank is not None: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') multi_gpu = args.local_rank is not None if multi_gpu: print("DISTRIBUTED with ", torch.distributed.get_world_size()) if args.fp16: optim_level = Optimization.mxprO3 else: optim_level = Optimization.mxprO0 model_definition = toml.load(args.model_toml) dataset_vocab = model_definition['labels']['labels'] ctc_vocab = add_blank_label(dataset_vocab) val_manifest = args.val_manifest featurizer_config = model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level if args.max_duration is not None: featurizer_config['max_duration'] = args.max_duration if args.pad_to is not None: featurizer_config['pad_to'] = args.pad_to if args.pad_to >= 0 else "max" print('model_config') print_dict(model_definition) print('feature_config') print_dict(featurizer_config) data_layer = None if args.wav is None: data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=args.batch_size, pad_to_max=featurizer_config['pad_to'] == "max", shuffle=False, multi_gpu=multi_gpu) audio_preprocessor = AudioPreprocessing(**featurizer_config) #encoderdecoder = JasperEncoderDecoder(jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab)) model = RNNT( feature_config=featurizer_config, rnnt=model_definition['rnnt'], num_classes=len(ctc_vocab) ) if args.ckpt is not None: print("loading model from ", args.ckpt) checkpoint = torch.load(args.ckpt, map_location="cpu") model.load_state_dict(checkpoint['state_dict'], strict=False) #greedy_decoder = GreedyCTCDecoder() # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights())) if args.wav is None: N = len(data_layer) step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) if args.steps is not None: print('-----------------') print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) print('Have {0} steps / (gpu * epoch).'.format(args.steps)) print('-----------------') else: print('-----------------') print('Have {0} examples to eval on.'.format(N)) print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch)) print('-----------------') else: audio_preprocessor.featurizer.normalize = "per_feature" print ("audio_preprocessor.normalize: ", audio_preprocessor.featurizer.normalize) audio_preprocessor.cuda() audio_preprocessor.eval() eval_transforms = torchvision.transforms.Compose([ lambda xs: [x.cuda() for x in xs], lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]], lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]], ]) model.cuda() if args.fp16: model = amp.initialize( models=model, opt_level=AmpOptimizations[optim_level]) model = model_multi_gpu(model, multi_gpu) greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model.module if multi_gpu else model) eval( data_layer=data_layer, audio_processor=eval_transforms, encoderdecoder=model, greedy_decoder=greedy_decoder, labels=ctc_vocab, args=args, multi_gpu=multi_gpu)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) args.local_rank = os.environ.get('LOCAL_RANK', args.local_rank) # set up distributed training cpu_distributed_training = False if torch.distributed.is_available() and int(os.environ.get('PMI_SIZE', '0')) > 1: print('Distributed training with DDP') os.environ['RANK'] = os.environ.get('PMI_RANK', '0') os.environ['WORLD_SIZE'] = os.environ.get('PMI_SIZE', '1') if not 'MASTER_ADDR' in os.environ: os.environ['MASTER_ADDR'] = args.master_addr if not 'MASTER_PORT' in os.environ: os.environ['MASTER_PORT'] = args.port # Initialize the process group with ccl backend if args.backend == 'ccl': import torch_ccl dist.init_process_group( backend=args.backend ) cpu_distributed_training = True if torch.distributed.is_initialized(): print("Torch distributed is initialized.") args.rank = torch.distributed.get_rank() args.world_size = torch.distributed.get_world_size() else: print("Torch distributed is not initialized.") args.rank = 0 args.world_size = 1 multi_gpu = False if multi_gpu: print_once("DISTRIBUTED TRAINING with {} gpus".format(torch.distributed.get_world_size())) optim_level = Optimization.mxprO0 model_definition = toml.load(args.model_toml) dataset_vocab = model_definition['labels']['labels'] ctc_vocab = add_blank_label(dataset_vocab) train_manifest = args.train_manifest val_manifest = args.val_manifest tst_manifest = args.tst_manifest featurizer_config = model_definition['input'] featurizer_config_eval = model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level featurizer_config_eval["optimization_level"] = optim_level sampler_type = featurizer_config.get("sampler", 'default') perturb_config = model_definition.get('perturb', None) if args.pad_to_max: assert(args.max_duration > 0) featurizer_config['max_duration'] = args.max_duration featurizer_config_eval['max_duration'] = args.max_duration featurizer_config['pad_to'] = "max" featurizer_config_eval['pad_to'] = "max" print_once('model_config') print_dict(model_definition) if args.gradient_accumulation_steps < 1: raise ValueError('Invalid gradient accumulation steps parameter {}'.format(args.gradient_accumulation_steps)) if args.batch_size % args.gradient_accumulation_steps != 0: raise ValueError('gradient accumulation step {} is not divisible by batch size {}'.format(args.gradient_accumulation_steps, args.batch_size)) preprocessor = preprocessing.AudioPreprocessing(**featurizer_config) if args.cuda: preprocessor.cuda() else: preprocessor.cpu() augmentations = preprocessing.SpectrogramAugmentation(**featurizer_config) if args.cuda: augmentations.cuda() else: augmentations.cpu() train_transforms = torchvision.transforms.Compose([ lambda xs: [x.cpu() for x in xs], lambda xs: [*preprocessor(xs[0:2]), *xs[2:]], lambda xs: [augmentations(xs[0]), *xs[1:]], lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]], ]) eval_transforms = torchvision.transforms.Compose([ lambda xs: [x.cpu() for x in xs], lambda xs: [*preprocessor(xs[0:2]), *xs[2:]], lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]], ]) data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, perturb_config=perturb_config, manifest_filepath=train_manifest, labels=dataset_vocab, batch_size=args.batch_size // args.gradient_accumulation_steps, multi_gpu=multi_gpu, pad_to_max=args.pad_to_max, sampler=sampler_type, cpu_distributed_training=cpu_distributed_training) eval_datasets = [( AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config_eval, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=args.eval_batch_size, multi_gpu=multi_gpu, pad_to_max=args.pad_to_max ), args.eval_frequency, 'Eval clean', )] if tst_manifest: eval_datasets.append(( AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config_eval, manifest_filepath=tst_manifest, labels=dataset_vocab, batch_size=args.eval_batch_size, multi_gpu=multi_gpu, pad_to_max=args.pad_to_max ), args.test_frequency, 'Test other', )) model = RNNT( feature_config=featurizer_config, rnnt=model_definition['rnnt'], num_classes=len(ctc_vocab) ) if args.ckpt is not None: print_once("loading model from {}".format(args.ckpt)) checkpoint = torch.load(args.ckpt, map_location="cpu") model.load_state_dict(checkpoint['state_dict'], strict=True) args.start_epoch = checkpoint['epoch'] else: args.start_epoch = 0 loss_fn = RNNTLoss(blank=len(ctc_vocab) - 1) N = len(data_layer) if sampler_type == 'default': args.step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size()))) elif sampler_type == 'bucket': args.step_per_epoch = int(len(data_layer.sampler) / args.batch_size ) print_once('-----------------') print_once('Have {0} examples to train on.'.format(N)) print_once('Have {0} steps / (gpu * epoch).'.format(args.step_per_epoch)) print_once('-----------------') constant_lr_policy = lambda _: args.lr fn_lr_policy = constant_lr_policy if args.lr_decay: pre_decay_policy = fn_lr_policy fn_lr_policy = lambda s: lr_decay(args.num_epochs * args.step_per_epoch, s, pre_decay_policy(s)) if args.lr_warmup: pre_warmup_policy = fn_lr_policy fn_lr_policy = lambda s: lr_warmup(args.lr_warmup, s, pre_warmup_policy(s) ) if args.optimizer_kind == "novograd": optimizer = Novograd(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer_kind == "adam": optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("invalid optimizer choice: {}".format(args.optimizer_kind)) if args.cuda and optim_level in AmpOptimizations: assert False, "not supported in ipex" if args.ckpt is not None: optimizer.load_state_dict(checkpoint['optimizer']) if args.ipex: if args.bf16: model, optimizer = ipex.optimize(model, dtype=torch.bfloat16, optimizer=optimizer) ipex.nn.utils._model_convert.replace_lstm_with_ipex_lstm(model) else: model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer) ipex.nn.utils._model_convert.replace_lstm_with_ipex_lstm(model) if args.world_size > 1: device_ids = None model = torch.nn.parallel.DistributedDataParallel(model, device_ids=device_ids) print_once(model) print_once("# parameters: {}".format(sum(p.numel() for p in model.parameters()))) greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model.module if multi_gpu else model) if args.tb_path and args.local_rank == 0: logger = TensorBoardLogger(args.tb_path, model.module if multi_gpu else model, args.histogram) else: logger = DummyLogger() train( data_layer=data_layer, model=model, loss_fn=loss_fn, greedy_decoder=greedy_decoder, optimizer=optimizer, data_transforms=train_transforms, labels=ctc_vocab, optim_level=optim_level, multi_gpu=multi_gpu, fn_lr_policy=fn_lr_policy, evalutaion=evaluator(model, eval_transforms, loss_fn, greedy_decoder, ctc_vocab, eval_datasets, logger), logger=logger, args=args)
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) assert (args.steps is None or args.steps > 5) if args.cpu: device = torch.device('cpu') else: assert (torch.cuda.is_available()) device = torch.device('cuda') torch.backends.cudnn.benchmark = args.cudnn_benchmark print("CUDNN BENCHMARK ", args.cudnn_benchmark) optim_level = 3 if args.amp else 0 batch_size = args.batch_size jasper_model_definition = toml.load(args.model_toml) dataset_vocab = jasper_model_definition['labels']['labels'] ctc_vocab = add_ctc_labels(dataset_vocab) val_manifest = args.val_manifest featurizer_config = jasper_model_definition['input_eval'] featurizer_config["optimization_level"] = optim_level if args.max_duration is not None: featurizer_config['max_duration'] = args.max_duration # TORCHSCRIPT: Cant use mixed types. Using -1 for "max" if args.pad_to is not None: featurizer_config['pad_to'] = args.pad_to if args.pad_to >= 0 else -1 if featurizer_config['pad_to'] == "max": featurizer_config['pad_to'] = -1 args.use_conv_mask = jasper_model_definition['encoder'].get( 'convmask', True) if args.use_conv_mask and args.torch_script: print( 'WARNING: Masked convs currently not supported for TorchScript. Disabling.' ) jasper_model_definition['encoder']['convmask'] = False print('model_config') print_dict(jasper_model_definition) print('feature_config') print_dict(featurizer_config) data_layer = AudioToTextDataLayer( dataset_dir=args.dataset_dir, featurizer_config=featurizer_config, manifest_filepath=val_manifest, labels=dataset_vocab, batch_size=batch_size, pad_to_max=featurizer_config['pad_to'] == -1, shuffle=False, multi_gpu=False) audio_preprocessor = AudioPreprocessing(**featurizer_config) encoderdecoder = JasperEncoderDecoder( jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab)) if args.ckpt is not None: print("loading model from ", args.ckpt) checkpoint = torch.load(args.ckpt, map_location="cpu") for k in audio_preprocessor.state_dict().keys(): checkpoint['state_dict'][k] = checkpoint['state_dict'].pop( "audio_preprocessor." + k) audio_preprocessor.load_state_dict(checkpoint['state_dict'], strict=False) encoderdecoder.load_state_dict(checkpoint['state_dict'], strict=False) greedy_decoder = GreedyCTCDecoder() # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights())) N = len(data_layer) step_per_epoch = math.ceil(N / args.batch_size) print('-----------------') if args.steps is None: print('Have {0} examples to eval on.'.format(N)) print('Have {0} steps / (epoch).'.format(step_per_epoch)) else: print('Have {0} examples to eval on.'.format(args.steps * args.batch_size)) print('Have {0} steps / (epoch).'.format(args.steps)) print('-----------------') audio_preprocessor.to(device) encoderdecoder.to(device) if args.amp: encoderdecoder = amp.initialize(models=encoderdecoder, opt_level='O' + str(optim_level)) eval(data_layer=data_layer, audio_processor=audio_preprocessor, encoderdecoder=encoderdecoder, greedy_decoder=greedy_decoder, labels=ctc_vocab, device=device, args=args)