def delta_evaluator(): """ Evaluates the delta based on past and current data, if the changes occur in each state corresponding message is added against them :return: """ global past_reader, current_reader past_reader = list(csv.reader(open(file_path, 'r'))) past_data, current_data = get_data_dict(past_reader), get_data_dict( current_reader) delta_dict = get_delta_dict(past_data, current_data) for index, msg in delta_dict.items(): current_reader[index][ constants.UPDATED_HIGHLIGHTER_INDEX] = " | ".join(msg) return not is_empty(delta_dict)
def test(netG, epoch, test_dataloader, opt, n_test_batches): # Select random index to save random_saving_idx = np.random.randint(0, n_test_batches, size=1) fix_saving_idx = 2 test_losses = 0.0 with torch.no_grad(): for i in range(n_test_batches): data_dict = utils.get_data_dict(test_dataloader) batch_dict = utils.get_next_batch(data_dict) res = netG.compute_all_losses(batch_dict) test_losses += res["loss"].detach() if i == fix_saving_idx or i == random_saving_idx: gt, pred, time_steps = visualize.make_save_sequence(opt, batch_dict, res) if opt.extrap: visualize.save_extrap_images(opt=opt, gt=gt, pred=pred, path=opt.test_image_path, total_step=100 * (epoch + 1) + i) else: visualize.save_interp_images(opt=opt, gt=gt, pred=pred, path=opt.test_image_path, total_step=100 * (epoch + 1) + i) test_losses /= n_test_batches print(f"[Test] Epoch [{epoch:03d}/{opt.epoch:03d}]\t" f"Loss {test_losses:.4f}\t")
def infer_and_metrics(self): test_interp = True if not self.opt.extrap else False for it in range(self.n_test_batches): data_dict = utils.get_data_dict(self.test_dataloader) batch_dict = utils.get_next_batch(data_dict, test_interp=test_interp) preds, extra_info = self.model.get_reconstruction( time_steps_to_predict=batch_dict["tp_to_predict"], truth=batch_dict["observed_data"], truth_time_steps=batch_dict["observed_tp"], mask=batch_dict["observed_mask"], out_mask=batch_dict["mask_predicted_data"]) b, _, c, h, w = batch_dict["data_to_predict"].size() selected_time_len = int(batch_dict["mask_predicted_data"][0].sum()) batch_dict["data_to_predict"] = batch_dict["data_to_predict"][ batch_dict["mask_predicted_data"].squeeze(-1).byte()].view( b, selected_time_len, c, h, w) visualize.save_test_images(opt=self.opt, preds=preds, batch_dict=batch_dict, path=self.opt.result_image_dir, index=it * self.opt.batch_size) if (it + 1) % 10 == 0: print(f"step: {it + 1:8d} testing...") pred_list = os.listdir(os.path.join(self.opt.result_image_dir, 'pred')) gt_list = os.listdir(os.path.join(self.opt.result_image_dir, 'gt')) evaluate.Evaluation(self.opt, pred_list, gt_list)
class Global: users = utils.get_users_list() progress = utils.get_users_progress() timeout = {} data_dict = utils.get_data_dict() def md5(): return utils.get_md5()
def preprocess_data(args): args.load_file = None args.batch = 64 args.device = -1 word_dict, pos_dict, ner_dict, train_data, dev_data, test_data = get_data_dict(args) print("save data to %s ..." % args.save_file) with open(args.save_file, 'wb') as output: torch.save([word_dict, pos_dict, ner_dict, train_data, dev_data, test_data], output)
def get(self, topic): logging.info(topic) topics = utils.get_topics() data = utils.get_data_dict(topic) print data params = { "topic": topic, "topics": topics, "data": data, } self.render('topic.html',**params)
def _preload(self,datadir): data_dict = get_data_dict(datadir) self.data_dict={} for label in data_dict.keys(): data = data_dict[label] for fname in data['filelist']: if label=='正常': self.data_dict[fname]={'is_normal':True} else: self.data_dict[fname]={'is_normal':False, 'bboxes':data['anno'][fname]['bboxes']} self.image_files = list(self.data_dict.keys()) print('total images: ', len(self.image_files))
def __init__(self, config, model, optim,pretrained=False): self.config = config self.device = config['device'] self.num_iters = config['trainer']['iters'] self.image_path = config['dataset']['image_path'] self.label_path = config['dataset']['label_path'] self.batch_size_train = config['trainer']['batch_size'] self.print_every = config['trainer']['print_every'] self.valid_every = config['trainer']['valid_every'] self.batch_size_val = config['val']['batch_size'] self.batch_size_test = config['test']['batch_size'] self.checkpoint = config['val']['checkpoint'] self.model = model print(self.model) if pretrained: self.load_checkpoint(self.checkpoint) self.iter = 0 self.optimizer = optim self.exp_lr_scheduler = lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.1) trans = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), transforms.RandomErasing(), ]) #get data dict self.data_dict = get_data_dict(self.image_path,self.label_path) #split data to train val test print(len(self.data_dict)) self.train_data_dict,self.val_test = train_test_split(self.data_dict, random_state=42, test_size=0.2) self.val_data_dict,self.test_data_dict = train_test_split(self.val_test,random_state=42,test_size=0.5) #init data loader self.train_data_loader = DataLoader(MyDataset(self.train_data_dict,trans),batch_size=self.batch_size_train, shuffle=True,num_workers=4) self.val_data_loader = DataLoader(MyDataset(self.val_data_dict),batch_size=self.batch_size_val, shuffle=False, num_workers=4) self.test_data_dict = DataLoader(MyDataset(self.test_data_dict),shuffle=False,batch_size=self.batch_size_test,num_workers=4) self.train_losses = []
sir.load_state_dict(checkpoint['model_state_dict']) writer_dir = 'runs/' + 'real_{}'.format(model_name) # Check if the writer directory exists, if yes delete it and overwrite if os.path.isdir(writer_dir): rmtree(writer_dir) writer = SummaryWriter(writer_dir) mode = 'fake' if mode == 'real': time_unit = 0.25 area = 'US' data_prelock = get_data_dict(area, data_dict=countries_dict_prelock, time_unit=time_unit, skip_every=0, cut_off=1.5e-3, populations=selected_countries_populations) # If I'm fitting real data, I only fit Infected. # I also know the initial condition of I, so I can force it. susceptible_weight = 0. infected_weight = 1. recovered_weight = 0. force_init = True else: # Synthetic data exact_i_0 = 0.25 exact_r_0 = 0.15 exact_beta = 0.2 exact_gamma = 0.5 # exact_i_0 = 0.5 # exact_r_0 = 0.2
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true") t_wait = 0.005 # waiting time, to potentially avoid to be detected as an attacker... data_total = [] # fields extracted ks_data = ['identifier','timestamp','authors','orci_authors','typology'] #,'title','subject_list'] # Initial data download url_bielfeld = 'http://pub.uni-bielefeld.de/oai?verb=ListRecords&metadataPrefix=oai_datacite' r = requests.get(url_bielfeld) o = xmltodict.parse(r.content) data = o['OAI-PMH']['ListRecords']['record'] r_token = o['OAI-PMH']['ListRecords']['resumptionToken']['#text'] tmp_data = [utils.get_data_dict(d, ks_data) for d in data] df = None # Loop over all the data as long as resumption token found... while True: url_bielfeld = 'https://pub.uni-bielefeld.de/oai?verb=ListRecords&resumptionToken='+r_token r = requests.get(url_bielfeld) o = xmltodict.parse(r.content) data = o['OAI-PMH']['ListRecords']['record'] tmp_data += [utils.get_data_dict(d, ks_data) for d in data] if not o['OAI-PMH']['ListRecords']['resumptionToken'].get('#text', False): print('********** data loaded **********') break
sir.load_state_dict(checkpoint['model_state_dict']) writer_dir = 'runs/' + 'fitting_{}'.format(model_name) # Check if the writer directory exists, if yes delete it and overwrite if os.path.isdir(writer_dir): rmtree(writer_dir) writer = SummaryWriter(writer_dir) if mode == 'real': area = 'Italy' time_unit = 0.25 cut_off = 1e-1 # Real data prelockdown data_prelock = get_data_dict(area, data_dict=countries_dict_prelock, time_unit=time_unit, skip_every=1, cut_off=cut_off, populations=selected_countries_populations, rescaling=selected_countries_rescaling) # Real data postlockdown data_postlock = get_data_dict(area, data_dict=countries_dict_postlock, time_unit=time_unit, skip_every=1, cut_off=0., populations=selected_countries_populations, rescaling=selected_countries_rescaling) susceptible_weight = 1. recovered_weight = 1. infected_weight = 1. force_init = False else: # Synthetic data exact_i_0 = 0.25 exact_r_0 = 0.15 exact_beta = 0.2 exact_gamma = 0.5
if args.debug: args.train_file = "data/debug_data/baidu.debug.json" args.dev_file = "data/debug_data/sogou.debug.json" if args.seed < 0: seed = time.time() % 10000 else: seed = args.seed print("Random Seed: %d" % seed) torch.manual_seed(int(seed)) if args.device >= 0: torch.cuda.set_device(args.device) word_dict, pos_dict, ner_dict, train_data, dev_data, test_data = utils.get_data_dict(args) model = DocumentReaderQA(word_dict, args, [pos_dict, ner_dict], [args.pos_vec_size, args.ner_vec_size]) model_folder, model_prefix = utils.get_folder_prefix(args, model) if args.device >= 0: model.cuda(args.device) if args.word_vectors != 'random': model.embedding.load_pretrained_vectors(args.word_vectors, binary=True, normalize=args.word_normalize) params = list() for name, param in model.named_parameters(): print(name, param.size()) params.append(param)
# Here I compare solution provided by Scipy with real data t_final = 20 time_unit = 0.25 area = 'Italy' scaled = True reducing_population = True multiplication_factor = 1 # Both data will have the shape of a multidimensional array [S(t), I(t), R(t)] data_prelock = get_data_dict(area=area, data_dict=countries_dict_prelock, time_unit=time_unit, skip_every=0, cut_off=1.5e-3, scaled=scaled, populations=selected_countries_populations, rescaling=selected_countries_rescaling, reducing_population=reducing_population) data_postlock = get_data_dict(area=area, data_dict=countries_dict_postlock, time_unit=time_unit, skip_every=1, cut_off=0., scaled=scaled, populations=selected_countries_populations, rescaling=selected_countries_rescaling, reducing_population=reducing_population) recovered_prelock = np.array([traj[2] for traj in list(data_prelock.values())])
def train(opt, netG, loader_objs, device): # Optimizer optimizer_netG = optim.Adamax(netG.parameters(), lr=opt.lr) # Discriminator netD_img, netD_seq, optimizer_netD = create_netD(opt, device) train_dataloader = loader_objs['train_dataloader'] test_dataloader = loader_objs['test_dataloader'] n_train_batches = loader_objs['n_train_batches'] n_test_batches = loader_objs['n_test_batches'] total_step = 0 start_time = time.time() for epoch in range(opt.epoch): utils.update_learning_rate(optimizer_netG, decay_rate=0.99, lowest=opt.lr / 10) utils.update_learning_rate(optimizer_netD, decay_rate=0.99, lowest=opt.lr / 10) for it in range(n_train_batches): data_dict = utils.get_data_dict(train_dataloader) batch_dict = utils.get_next_batch(data_dict) res = netG.compute_all_losses(batch_dict) loss_netG = res["loss"] # Compute Adversarial Loss real = batch_dict["data_to_predict"] fake = res["pred_y"] input_real = batch_dict["observed_data"] # Filter out mask if opt.irregular: b, _, c, h, w = real.size() observed_mask = batch_dict["observed_mask"] mask_predicted_data = batch_dict["mask_predicted_data"] selected_timesteps = int(observed_mask[0].sum()) input_real = input_real[observed_mask.squeeze(-1).byte(), ...].view(b, selected_timesteps, c, h, w) real = real[mask_predicted_data.squeeze(-1).byte(), ...].view(b, selected_timesteps, c, h, w) loss_netD = opt.lamb_adv * netD_seq.netD_adv_loss(real, fake, input_real) loss_netD += opt.lamb_adv * netD_img.netD_adv_loss(real, fake, None) loss_adv_netG = opt.lamb_adv * netD_seq.netG_adv_loss(fake, input_real) loss_adv_netG += opt.lamb_adv * netD_img.netG_adv_loss(fake, None) loss_netG += loss_adv_netG # Train D optimizer_netD.zero_grad() loss_netD.backward() optimizer_netD.step() # Train G optimizer_netG.zero_grad() loss_netG.backward() optimizer_netG.step() if (total_step + 1) % opt.log_print_freq == 0 or total_step == 0: et = time.time() - start_time et = str(datetime.timedelta(seconds=et))[:-7] log = f"Elapsed [{et}] Epoch [{epoch:03d}/{opt.epoch:03d}]\t"\ f"Iterations [{(total_step + 1):6d}] \t"\ f"Mse [{res['loss'].item():.4f}]\t"\ f"Adv_G [{loss_adv_netG.item():.4f}]\t"\ f"Adv_D [{loss_netD.item():.4f}]" print(log) if (total_step + 1) % opt.ckpt_save_freq == 0 or (epoch + 1 == opt.epoch and it + 1 == n_train_batches) or total_step == 0: utils.save_checkpoint(netG, os.path.join(opt.checkpoint_dir, f"ckpt_{(total_step + 1):08d}.pth")) if (total_step + 1) % opt.image_print_freq == 0 or total_step == 0: gt, pred, time_steps = visualize.make_save_sequence(opt, batch_dict, res) if opt.extrap: visualize.save_extrap_images(opt=opt, gt=gt, pred=pred, path=opt.train_image_path, total_step=total_step) else: visualize.save_interp_images(opt=opt, gt=gt, pred=pred, path=opt.train_image_path, total_step=total_step) total_step += 1 # Test if (epoch + 1) % 100 == 0: test(netG, epoch, test_dataloader, opt, n_test_batches)
url_bielfeld = 'https://pub.uni-bielefeld.de/oai?verb=ListRecords&resumptionToken='+r_token r = requests.get(url_bielfeld) o = xmltodict.parse(r.content) data = o['OAI-PMH']['ListRecords']['record'] if not o['OAI-PMH']['ListRecords']['resumptionToken'].get('#text', False): print('********** data loaded **********') break r_token = o['OAI-PMH']['ListRecords']['resumptionToken']['#text'] [data_total.append(d) for d in data] if len(data_total) % 1000 == 0: print(len(data_total)) time.sleep(t_wait) # Process data ks_data = ['identifier','timestamp','authors','orci_authors','title','subject_list','typology'] processed_data = [utils.get_data_dict(data_total[i], ks_data) for i in tqdm.tqdm(range(len(data_total)))] # Build Dataframe df = pd.DataFrame(dict([(k,[d[k] for d in processed_data]) for k in ks_data])) df['year'] = [int(s[:4]) for s in df['timestamp'].to_list()] df['typology'] = [t[0] for t in df['typology'].to_list()] df['authors'] = [json.dumps(a) for a in df['authors']] df['orci_authors'] = [json.dumps(a) for a in df['orci_authors']] # Save onto data/df.csv df.to_csv('data/df.csv')