def delta_evaluator():
    """
    Evaluates the delta based on past and current data, if the changes occur in each state
    corresponding message is added against them
    :return:
    """
    global past_reader, current_reader
    past_reader = list(csv.reader(open(file_path, 'r')))
    past_data, current_data = get_data_dict(past_reader), get_data_dict(
        current_reader)
    delta_dict = get_delta_dict(past_data, current_data)
    for index, msg in delta_dict.items():
        current_reader[index][
            constants.UPDATED_HIGHLIGHTER_INDEX] = " | ".join(msg)

    return not is_empty(delta_dict)
Beispiel #2
0
def test(netG, epoch, test_dataloader, opt, n_test_batches):
    
    # Select random index to save
    random_saving_idx = np.random.randint(0, n_test_batches, size=1)
    fix_saving_idx = 2
    test_losses = 0.0
    
    with torch.no_grad():
        for i in range(n_test_batches):
            data_dict = utils.get_data_dict(test_dataloader)
            batch_dict = utils.get_next_batch(data_dict)

            res = netG.compute_all_losses(batch_dict)
            test_losses += res["loss"].detach()

            if i == fix_saving_idx or i == random_saving_idx:
    
                gt, pred, time_steps = visualize.make_save_sequence(opt, batch_dict, res)

                if opt.extrap:
                    visualize.save_extrap_images(opt=opt, gt=gt, pred=pred, path=opt.test_image_path, total_step=100 * (epoch + 1) + i)
                else:
                    visualize.save_interp_images(opt=opt, gt=gt, pred=pred, path=opt.test_image_path, total_step=100 * (epoch + 1) + i)
                    
        test_losses /= n_test_batches

    print(f"[Test] Epoch [{epoch:03d}/{opt.epoch:03d}]\t" f"Loss {test_losses:.4f}\t")
Beispiel #3
0
    def infer_and_metrics(self):

        test_interp = True if not self.opt.extrap else False

        for it in range(self.n_test_batches):
            data_dict = utils.get_data_dict(self.test_dataloader)
            batch_dict = utils.get_next_batch(data_dict,
                                              test_interp=test_interp)

            preds, extra_info = self.model.get_reconstruction(
                time_steps_to_predict=batch_dict["tp_to_predict"],
                truth=batch_dict["observed_data"],
                truth_time_steps=batch_dict["observed_tp"],
                mask=batch_dict["observed_mask"],
                out_mask=batch_dict["mask_predicted_data"])

            b, _, c, h, w = batch_dict["data_to_predict"].size()
            selected_time_len = int(batch_dict["mask_predicted_data"][0].sum())
            batch_dict["data_to_predict"] = batch_dict["data_to_predict"][
                batch_dict["mask_predicted_data"].squeeze(-1).byte()].view(
                    b, selected_time_len, c, h, w)

            visualize.save_test_images(opt=self.opt,
                                       preds=preds,
                                       batch_dict=batch_dict,
                                       path=self.opt.result_image_dir,
                                       index=it * self.opt.batch_size)

            if (it + 1) % 10 == 0:
                print(f"step: {it + 1:8d} testing...")

        pred_list = os.listdir(os.path.join(self.opt.result_image_dir, 'pred'))
        gt_list = os.listdir(os.path.join(self.opt.result_image_dir, 'gt'))

        evaluate.Evaluation(self.opt, pred_list, gt_list)
Beispiel #4
0
class Global:
    users = utils.get_users_list()
    progress = utils.get_users_progress()
    timeout = {}
    data_dict = utils.get_data_dict()

    def md5():
        return utils.get_md5()
Beispiel #5
0
def preprocess_data(args):
    args.load_file = None
    args.batch = 64
    args.device = -1
    word_dict, pos_dict, ner_dict, train_data, dev_data, test_data = get_data_dict(args)
    print("save data to %s ..." % args.save_file)
    with open(args.save_file, 'wb') as output:
        torch.save([word_dict, pos_dict, ner_dict, train_data, dev_data, test_data], output)
Beispiel #6
0
 def get(self, topic): 
     logging.info(topic)
     topics = utils.get_topics()
     data = utils.get_data_dict(topic)
     print data
     params = {
         "topic": topic,
         "topics": topics,
         "data": data,
     }
     self.render('topic.html',**params)
Beispiel #7
0
 def _preload(self,datadir):
     data_dict = get_data_dict(datadir)
     self.data_dict={}
     for label in data_dict.keys():
         data = data_dict[label]
         for fname in data['filelist']:
             if label=='正常':
                 self.data_dict[fname]={'is_normal':True}
             else:
                 self.data_dict[fname]={'is_normal':False, 'bboxes':data['anno'][fname]['bboxes']}
     self.image_files = list(self.data_dict.keys())
     print('total images: ', len(self.image_files))
Beispiel #8
0
    def __init__(self, config, model, optim,pretrained=False):

        self.config = config

        self.device = config['device']
        self.num_iters = config['trainer']['iters']

        self.image_path = config['dataset']['image_path']
        self.label_path = config['dataset']['label_path']

        self.batch_size_train = config['trainer']['batch_size']
        self.print_every = config['trainer']['print_every']
        self.valid_every = config['trainer']['valid_every']

        self.batch_size_val = config['val']['batch_size']
        self.batch_size_test = config['test']['batch_size']

        self.checkpoint = config['val']['checkpoint']
        self.model = model
        print(self.model)
        if pretrained:
            self.load_checkpoint(self.checkpoint)

        self.iter = 0

        self.optimizer = optim
        self.exp_lr_scheduler = lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.1) 
        
        trans = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            transforms.RandomErasing(),
        ])
        #get data dict 
        self.data_dict = get_data_dict(self.image_path,self.label_path)
        #split data to train val test
        print(len(self.data_dict))
        self.train_data_dict,self.val_test = train_test_split(self.data_dict, random_state=42, test_size=0.2)
        self.val_data_dict,self.test_data_dict = train_test_split(self.val_test,random_state=42,test_size=0.5)
        #init data loader
        self.train_data_loader = DataLoader(MyDataset(self.train_data_dict,trans),batch_size=self.batch_size_train, shuffle=True,num_workers=4)
        self.val_data_loader = DataLoader(MyDataset(self.val_data_dict),batch_size=self.batch_size_val, shuffle=False, num_workers=4)
        self.test_data_dict = DataLoader(MyDataset(self.test_data_dict),shuffle=False,batch_size=self.batch_size_test,num_workers=4)

        self.train_losses = []
Beispiel #9
0
    sir.load_state_dict(checkpoint['model_state_dict'])

    writer_dir = 'runs/' + 'real_{}'.format(model_name)

    # Check if the writer directory exists, if yes delete it and overwrite
    if os.path.isdir(writer_dir):
        rmtree(writer_dir)

    writer = SummaryWriter(writer_dir)

    mode = 'fake'

    if mode == 'real':
        time_unit = 0.25
        area = 'US'
        data_prelock = get_data_dict(area, data_dict=countries_dict_prelock, time_unit=time_unit, skip_every=0,
                                     cut_off=1.5e-3, populations=selected_countries_populations)

        # If I'm fitting real data, I only fit Infected.
        # I also know the initial condition of I, so I can force it.
        susceptible_weight = 0.
        infected_weight = 1.
        recovered_weight = 0.
        force_init = True
    else:
        # Synthetic data
        exact_i_0 = 0.25
        exact_r_0 = 0.15
        exact_beta = 0.2
        exact_gamma = 0.5
        # exact_i_0 = 0.5
        # exact_r_0 = 0.2
Beispiel #10
0
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

t_wait = 0.005 # waiting time, to potentially avoid to be detected as an attacker...
data_total = []

# fields extracted
ks_data = ['identifier','timestamp','authors','orci_authors','typology'] #,'title','subject_list']

# Initial data download
url_bielfeld = 'http://pub.uni-bielefeld.de/oai?verb=ListRecords&metadataPrefix=oai_datacite'
r = requests.get(url_bielfeld)
o = xmltodict.parse(r.content)

data = o['OAI-PMH']['ListRecords']['record'] 
r_token = o['OAI-PMH']['ListRecords']['resumptionToken']['#text']
tmp_data = [utils.get_data_dict(d, ks_data) for d in data]

df = None

# Loop over all the data as long as resumption token found...
while True:
    url_bielfeld = 'https://pub.uni-bielefeld.de/oai?verb=ListRecords&resumptionToken='+r_token
    r = requests.get(url_bielfeld)
    o = xmltodict.parse(r.content)
    data = o['OAI-PMH']['ListRecords']['record'] 
    tmp_data += [utils.get_data_dict(d, ks_data) for d in data]
    
    if not o['OAI-PMH']['ListRecords']['resumptionToken'].get('#text', False):
        print('********** data loaded **********')
        break
Beispiel #11
0
    sir.load_state_dict(checkpoint['model_state_dict'])

    writer_dir = 'runs/' + 'fitting_{}'.format(model_name)

    # Check if the writer directory exists, if yes delete it and overwrite
    if os.path.isdir(writer_dir):
        rmtree(writer_dir)
    writer = SummaryWriter(writer_dir)

    if mode == 'real':
        area = 'Italy'
        time_unit = 0.25
        cut_off = 1e-1
        # Real data prelockdown
        data_prelock = get_data_dict(area, data_dict=countries_dict_prelock, time_unit=time_unit,
                                     skip_every=1, cut_off=cut_off, populations=selected_countries_populations,
                                     rescaling=selected_countries_rescaling)
        # Real data postlockdown
        data_postlock = get_data_dict(area, data_dict=countries_dict_postlock, time_unit=time_unit,
                                      skip_every=1, cut_off=0., populations=selected_countries_populations,
                                      rescaling=selected_countries_rescaling)
        susceptible_weight = 1.
        recovered_weight = 1.
        infected_weight = 1.
        force_init = False
    else:
        # Synthetic data
        exact_i_0 = 0.25
        exact_r_0 = 0.15
        exact_beta = 0.2
        exact_gamma = 0.5
Beispiel #12
0
if args.debug:
    args.train_file = "data/debug_data/baidu.debug.json"
    args.dev_file = "data/debug_data/sogou.debug.json"

if args.seed < 0:
    seed = time.time() % 10000
else:
    seed = args.seed
print("Random Seed: %d" % seed)
torch.manual_seed(int(seed))

if args.device >= 0:
    torch.cuda.set_device(args.device)

word_dict, pos_dict, ner_dict, train_data, dev_data, test_data = utils.get_data_dict(args)

model = DocumentReaderQA(word_dict, args, [pos_dict, ner_dict], [args.pos_vec_size, args.ner_vec_size])

model_folder, model_prefix = utils.get_folder_prefix(args, model)

if args.device >= 0:
    model.cuda(args.device)

if args.word_vectors != 'random':
    model.embedding.load_pretrained_vectors(args.word_vectors, binary=True, normalize=args.word_normalize)

params = list()
for name, param in model.named_parameters():
    print(name, param.size())
    params.append(param)
Beispiel #13
0
# Here I compare solution provided by Scipy with real data

t_final = 20
time_unit = 0.25
area = 'Italy'
scaled = True
reducing_population = True

multiplication_factor = 1

# Both data will have the shape of a multidimensional array [S(t), I(t), R(t)]
data_prelock = get_data_dict(area=area,
                             data_dict=countries_dict_prelock,
                             time_unit=time_unit,
                             skip_every=0,
                             cut_off=1.5e-3,
                             scaled=scaled,
                             populations=selected_countries_populations,
                             rescaling=selected_countries_rescaling,
                             reducing_population=reducing_population)
data_postlock = get_data_dict(area=area,
                              data_dict=countries_dict_postlock,
                              time_unit=time_unit,
                              skip_every=1,
                              cut_off=0.,
                              scaled=scaled,
                              populations=selected_countries_populations,
                              rescaling=selected_countries_rescaling,
                              reducing_population=reducing_population)

recovered_prelock = np.array([traj[2] for traj in list(data_prelock.values())])
Beispiel #14
0
def train(opt, netG, loader_objs, device):
    # Optimizer
    optimizer_netG = optim.Adamax(netG.parameters(), lr=opt.lr)
    
    # Discriminator
    netD_img, netD_seq, optimizer_netD = create_netD(opt, device)
    
    train_dataloader = loader_objs['train_dataloader']
    test_dataloader = loader_objs['test_dataloader']
    n_train_batches = loader_objs['n_train_batches']
    n_test_batches = loader_objs['n_test_batches']
    total_step = 0
    start_time = time.time()
    
    for epoch in range(opt.epoch):
        
        utils.update_learning_rate(optimizer_netG, decay_rate=0.99, lowest=opt.lr / 10)
        utils.update_learning_rate(optimizer_netD, decay_rate=0.99, lowest=opt.lr / 10)
        
        for it in range(n_train_batches):
            
            data_dict = utils.get_data_dict(train_dataloader)
            batch_dict = utils.get_next_batch(data_dict)
            
            res = netG.compute_all_losses(batch_dict)
            loss_netG = res["loss"]
            
            # Compute Adversarial Loss
            real = batch_dict["data_to_predict"]
            fake = res["pred_y"]
            input_real = batch_dict["observed_data"]

            # Filter out mask
            if opt.irregular:
                b, _, c, h, w = real.size()
                observed_mask = batch_dict["observed_mask"]
                mask_predicted_data = batch_dict["mask_predicted_data"]

                selected_timesteps = int(observed_mask[0].sum())
                input_real = input_real[observed_mask.squeeze(-1).byte(), ...].view(b, selected_timesteps, c, h, w)
                real = real[mask_predicted_data.squeeze(-1).byte(), ...].view(b, selected_timesteps, c, h, w)

            loss_netD = opt.lamb_adv * netD_seq.netD_adv_loss(real, fake, input_real)
            loss_netD += opt.lamb_adv * netD_img.netD_adv_loss(real, fake, None)

            loss_adv_netG = opt.lamb_adv * netD_seq.netG_adv_loss(fake, input_real)
            loss_adv_netG += opt.lamb_adv * netD_img.netG_adv_loss(fake, None)
            loss_netG += loss_adv_netG

            # Train D
            optimizer_netD.zero_grad()
            loss_netD.backward()
            optimizer_netD.step()
            
            # Train G
            optimizer_netG.zero_grad()
            loss_netG.backward()
            optimizer_netG.step()
            
            if (total_step + 1) % opt.log_print_freq == 0 or total_step == 0:
                et = time.time() - start_time
                et = str(datetime.timedelta(seconds=et))[:-7]
                log = f"Elapsed [{et}] Epoch [{epoch:03d}/{opt.epoch:03d}]\t"\
                        f"Iterations [{(total_step + 1):6d}] \t"\
                        f"Mse [{res['loss'].item():.4f}]\t"\
                        f"Adv_G [{loss_adv_netG.item():.4f}]\t"\
                        f"Adv_D [{loss_netD.item():.4f}]"
                
                print(log)

            if (total_step + 1) % opt.ckpt_save_freq == 0 or (epoch + 1 == opt.epoch and it + 1 == n_train_batches) or total_step == 0:
                utils.save_checkpoint(netG, os.path.join(opt.checkpoint_dir, f"ckpt_{(total_step + 1):08d}.pth"))
            
            if (total_step + 1) % opt.image_print_freq == 0 or total_step == 0:
                
                gt, pred, time_steps = visualize.make_save_sequence(opt, batch_dict, res)

                if opt.extrap:
                    visualize.save_extrap_images(opt=opt, gt=gt, pred=pred, path=opt.train_image_path, total_step=total_step)
                else:
                    visualize.save_interp_images(opt=opt, gt=gt, pred=pred, path=opt.train_image_path, total_step=total_step)
            
            total_step += 1
            
        # Test
        if (epoch + 1) % 100 == 0:
            test(netG, epoch, test_dataloader, opt, n_test_batches)
    url_bielfeld = 'https://pub.uni-bielefeld.de/oai?verb=ListRecords&resumptionToken='+r_token
    r = requests.get(url_bielfeld)
    o = xmltodict.parse(r.content)
    data = o['OAI-PMH']['ListRecords']['record'] 
    if not o['OAI-PMH']['ListRecords']['resumptionToken'].get('#text', False):
        print('********** data loaded **********')
        break
    r_token = o['OAI-PMH']['ListRecords']['resumptionToken']['#text']
    [data_total.append(d) for d in data]
    if len(data_total) % 1000 == 0:
        print(len(data_total))
    time.sleep(t_wait)


# Process data
ks_data = ['identifier','timestamp','authors','orci_authors','title','subject_list','typology']

processed_data = [utils.get_data_dict(data_total[i], ks_data)
                  for i in tqdm.tqdm(range(len(data_total)))]


# Build Dataframe
df = pd.DataFrame(dict([(k,[d[k] for d in processed_data]) for k in ks_data]))
df['year'] = [int(s[:4]) for s in df['timestamp'].to_list()]
df['typology'] = [t[0] for t in df['typology'].to_list()]
df['authors'] = [json.dumps(a) for a in df['authors']]
df['orci_authors'] = [json.dumps(a) for a in df['orci_authors']]

# Save onto data/df.csv
df.to_csv('data/df.csv')