Beispiel #1
0
def test(len1, len2, data, op="RMS"):
    criterion = nn.MSELoss()
    # criterion=nn.L1Loss()
    # criterion=nn.CrossEntropyLoss()
    ae = AE(len(data.columns), len1, len2, AE_TYPE)
    if op == "RMS":
        optimizer = optim.RMSprop(ae.parameters(), lr=0.01, weight_decay=0.5)
    elif op == "Adam":
        optimizer = optim.Adam(ae.parameters(), lr=0.1, weight_decay=0.5)
    ae = train(ae, criterion, optimizer, len1, len2, op)

    # 查看参数
    # for name, param in ae.named_parameters():
    #     print(name,param)
    # input()

    d = data.copy()
    for index, epoch in enumerate(data.copy().values):
        # 标记缺失位置
        mark_null = []
        for i, v in enumerate(epoch):
            if (np.isnan(v)):
                mark_null.append(i)
                epoch[i] = 0

        epoch = Variable(torch.from_numpy(epoch.astype(np.double)).double())
        ae.double()
        #epoch=torch.from_numpy(epoch.astype(np.double)).double()
        pre = ae(epoch.double())
        d.values[index] = pre.detach().numpy()
    return d
Beispiel #2
0
def load_wav_to_torch(full_path):
    """
    Loads wavdata into torch array
    """
    sampling_rate, data = read(full_path)
    # https://github.com/pytorch/pytorch/issues/47160#issue-733792677
    return torch.from_numpy(data.copy()).float(), sampling_rate
Beispiel #3
0
def KD(data):
    data_df = data.copy()
    data_df['min'] = data_df['Low'].rolling(9).min()
    data_df['max'] = data_df['High'].rolling(9).max()
    data_df['RSV'] = (data_df['Close'] - data_df['min']) / \
        (data_df['max'] - data_df['min'])
    data_df = data_df.dropna()
    # 計算K
    # K的初始值定為50
    K_list = [50]
    for num, rsv in enumerate(list(data_df['RSV'])):
        K_yestarday = K_list[num]
        K_today = 2/3 * K_yestarday + 1/3 * rsv
        K_list.append(K_today)
    data_df['K'] = K_list[1:]
    # 計算D
    # D的初始值定為50
    D_list = [50]
    for num, K in enumerate(list(data_df['K'])):
        D_yestarday = D_list[num]
        D_today = 2/3 * D_yestarday + 1/3 * K
        D_list.append(D_today)
    data_df['D'] = D_list[1:]
    use_df = pd.merge(data, data_df[['K', 'D']],
                      left_index=True, right_index=True, how='left')
    return use_df
Beispiel #4
0
    def __getitem__(self, index):
        data = np.asarray(Image.open(self.data_filenames[index]))
        label = np.asarray(Image.open(self.label_filenames[index]))

        if self.phase == 'train':
            num = random.randint(0, 11)
            flip = num // 4
            degree = num % 4

            if flip == 1:
                data = np.flip(data, 0)
                label = np.flip(label, 0)
            if flip == 2:
                data = np.flip(data, 1)
                label = np.flip(label, 1)

            if degree != 0:
                data = rotate(data, 90 * degree)
                label = rotate(label, 90 * degree)
        elif self.phase == 'test':
            pass

        if self.transform:
            data = self.transform(data.copy())
            label = self.transform(label.copy())

        #return data.half(), label.half()
        return data, label
Beispiel #5
0
    def __init__(self, data, label, env, shuffle=False):
        """
        Args:
            data : The eeg signal with shape [N, 1, C, T]
            env  : The index of environment where the data was recorded
            mode : Indicate procedure status(training or testing)
        """
        self.data = data.copy()
        self.label = label.copy()
        self.env = env.copy()

        #self.labelnum, self.envnum = max(self.label)+1, max(self.env)+1
        self.labelnum, self.envnum = len(np.unique(self.label)), len(
            np.unique(self.env))
        print("There are {0} datas".format(len(self.label)))
        """
            iteIndex shows how many pairs in this part of triplet pairs are already taken
            triPairLen shows how many triplet paris there are in this part
            triPairIndex shows the index of the part
        """
        #self.make_triplet_pairs()
        self.dataNums = np.zeros(
            (max(self.env) + 1, max(self.label) + 1)).astype(int)
        self.datalen = 0
        self.dataNumsAccum = []
        self.sub_data_A_ind = []
        self.sub_data_N_ind = []
        self.sub_data_AP_ind = []
        if shuffle:
            self.shuffle()
        self.set_length()

        self.selected = np.zeros(len(self.label))
Beispiel #6
0
 def __from_dummies__(self, prefix_sep='=', **kwargs):
     """
         Convert encoded columns into original ones
     """
     if 'ext_data' in kwargs:
         data = kwargs['ext_data']
     else:
         data = self.df
     categories = self.cat_clm
     cat_was_num = self.categorical_was_numeric
     out = data.copy()
     for l in categories:
         cols = data.filter(regex="^{}{}".format(l, prefix_sep),
                            axis=1).columns
         labs = [
             cols[i].split(prefix_sep)[-1] for i in range(cols.shape[0])
         ]
         out[l] = pd.Categorical(
             np.array(labs)[np.argmax(data[cols].values, axis=1)])
         out.drop(cols, axis=1, inplace=True)
         if l in cat_was_num.keys():
             out[l] = out[l].astype(cat_was_num[l])
     if 'ext_data' in kwargs:
         return out
     else:
         self.df = out
Beispiel #7
0
 def transform(self, data):
     data = data.copy().astype('float32')
     data = (data - self.minn) / (self.maxx - self.minn) * 2 - 1
     if self.height * self.height > len(data[0]):
         padding = np.zeros(
             (len(data), self.height * self.height - len(data[0])))
         data = np.concatenate([data, padding], axis=1)
     return data.reshape(-1, 1, self.height, self.height)
Beispiel #8
0
def get_topk_accuracy_quick(net, k, data):
    code_0 = get_code_quick(net, torch.tensor(data))

    code_1 = get_code_quick(net,
                            torch.tensor(transform_images(data.copy(), 90)))

    dist = cdist(code_0, code_1)
    I = dist.argsort(axis=1)
    b = I[:, 0:k] == np.array(range(code_1.shape[0]), ).reshape(
        code_1.shape[0], 1).astype(np.int)
    return np.sum(np.any(b, axis=1)) / code_1.shape[0]
Beispiel #9
0
def load_dataset(path, s_label):
    data = pd.read_csv(path)
    # Preprocessing taken from https://www.kaggle.com/islomjon/income-prediction-with-ensembles-of-decision-trees

    # replace missing values with majority class
    data['workclass'] = data['workclass'].replace('?', 'Private')
    data['occupation'] = data['occupation'].replace('?', 'Prof-specialty')
    data['native-country'] = data['native-country'].replace(
        '?', 'United-States')

    # education category
    data.education = data.education.replace([
        'Preschool', '1st-4th', '5th-6th', '7th-8th', '9th', '10th', '11th',
        '12th'
    ], 'left')
    data.education = data.education.replace('HS-grad', 'school')
    data.education = data.education.replace(
        ['Assoc-voc', 'Assoc-acdm', 'Prof-school', 'Some-college'], 'higher')
    data.education = data.education.replace('Bachelors', 'undergrad')
    data.education = data.education.replace('Masters', 'grad')
    data.education = data.education.replace('Doctorate', 'doc')

    # marital status
    data['marital-status'] = data['marital-status'].replace(
        ['Married-civ-spouse', 'Married-AF-spouse'], 'married')
    data['marital-status'] = data['marital-status'].replace([
        'Never-married', 'Divorced', 'Separated', 'Widowed',
        'Married-spouse-absent'
    ], 'not-married')

    # income
    data.income = data.income.replace('<=50K', 0)
    data.income = data.income.replace('>50K', 1)

    # sex
    data.gender = data.gender.replace('Male', 0)
    data.gender = data.gender.replace('Female', 1)

    # encode categorical values
    data1 = data.copy()
    data1 = pd.get_dummies(data1)
    data1 = data1.drop(['income', s_label], axis=1)

    X = StandardScaler().fit(data1).transform(data1)
    y = data['income'].values
    s = data[s_label].values

    return X, y, s
Beispiel #10
0
 def rotate_point_cloud(self, data):
     """ Randomly rotate the point clouds to augument the dataset
         rotation is per shape based along up direction
         Input:
           Nx3 array, original batch of point clouds
         Return:
           Nx3 array, rotated batch of point clouds
     """
     rotation_angle = np.random.uniform() * 2 * np.pi
     cosval = np.cos(rotation_angle)
     sinval = np.sin(rotation_angle)
     rotation_matrix = np.array([[cosval, 0, sinval], [0, 1, 0],
                                 [-sinval, 0, cosval]])
     shape_pc = data.copy()
     rotated_data = np.dot(shape_pc, rotation_matrix)
     return rotated_data
def synchGenerator():
    layer_num = 0
    all_data = []
    for param in netG.parameters():
        all_data.append([])
        data = param.data.numpy().copy()
        sendbfr_prev = data.copy()
        recvbfr = None

        data = comm.gather(data, root=0)

        if (rank == 0):
            #print(sendbfr_prev.dtype)
            new_weights = np.zeros(sendbfr_prev.shape, sendbfr_prev.dtype)
            #print(new_weights.dtype)
            for i in range(1, size):
                new_weights += data[i] - sendbfr_prev
                #if (layer_num == 2):
                #    print(sendbfr_prev - data[i])
            new_weights /= (size - 1)
            #if (layer_num == 2):
            #    print(new_weights)
            new_weights += sendbfr_prev

            param.data = torch.from_numpy(new_weights)

            #print("Layer " + str(layer_num) + " has finished aggregating weights")
        else:
            pass
            #print("Node rank " + str(rank) + " has sent generator differences for layer " + str(layer_num))
        layer_num += 1

    layer_num = 0

    for param in netG.parameters():
        if (rank == 0):
            data = param.data.numpy().copy()
        else:
            data = None

        data = comm.bcast(data, root=0)
        if (rank != 0):
            param.data = torch.from_numpy(data)
            #print("Node rank " + str(rank) + " has synched generator layer " + str(layer_num))
        layer_num += 1
Beispiel #12
0
    def __call__(self, data):
        ''' Calls the transformation.

        Args:
            data (dictionary): data dictionary
        '''
        points = data[None]
        occ = data['occ']

        data_out = data.copy()
        if isinstance(self.N, int):
            idx = np.random.randint(points.shape[0], size=self.N)
            data_out.update({
                None: points[idx, :],
                'occ': occ[idx],
            })
        else:
            Nt_out, Nt_in = self.N
            occ_binary = (occ >= 0.5)
            points0 = points[~occ_binary]
            points1 = points[occ_binary]

            idx0 = np.random.randint(points0.shape[0], size=Nt_out)
            idx1 = np.random.randint(points1.shape[0], size=Nt_in)

            points0 = points0[idx0, :]
            points1 = points1[idx1, :]
            points = np.concatenate([points0, points1], axis=0)

            occ0 = np.zeros(Nt_out, dtype=np.float32)
            occ1 = np.ones(Nt_in, dtype=np.float32)
            occ = np.concatenate([occ0, occ1], axis=0)

            volume = occ_binary.sum() / len(occ_binary)
            volume = volume.astype(np.float32)

            data_out.update({
                None: points,
                'occ': occ,
                'volume': volume,
            })
        return data_out
Beispiel #13
0
def get_topk_accuracy(net, k, data):
    code_0 = get_code(
        net,
        torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
            torch.tensor(data)),
                                    batch_size=run.batch_size * 4,
                                    drop_last=False))

    code_1 = get_code(
        net,
        torch.utils.data.DataLoader(torch.utils.data.TensorDataset(
            torch.tensor(transform_images(data.copy(), 90))),
                                    batch_size=run.batch_size * 4,
                                    drop_last=False))

    dist = cdist(code_0, code_1)
    I = dist.argsort(axis=1)
    b = I[:, 0:k] == np.array(range(code_1.shape[0]), ).reshape(
        code_1.shape[0], 1).astype(np.int)
    return np.sum(np.any(b, axis=1)) / code_1.shape[0]
def data_from_prediction(data, G, obj_dic):
    agent_dic = dict()
    for i in data.true_time.unique():
        agent_dic[i] = list(data.predict_loc[data.true_time == i])
    data_2 = data.copy()
    min_time = min(data.time)
    for idx in data_2.index:
        if data_2.loc[idx, 'time'] > min_time:
            #move states down one position
            data_2.at[idx, 'sample1'] = data.loc[idx - 1, 'query_state'].copy()
            data_2.at[idx, 'sample2'] = data.loc[idx - 1, 'sample1'].copy()
            data_2.at[idx, 'sample3'] = data.loc[idx - 1, 'sample2'].copy()
            data_2.at[idx, 'sample4'] = data.loc[idx - 1, 'sample3'].copy()
            data_2.at[idx, 'sample5'] = data.loc[idx - 1, 'sample4'].copy()
            #replace query state by predicted state
            data_2.at[idx, 'query_state'] = new_state(
                G, data.loc[idx - 1, 'predict_loc'], obj_dic, agent_dic,
                data.loc[idx, 'true_time'])
            direction_vec = [0, 0, 0, 0, 0]
            direction_vec[int(data.loc[idx - 1, 'prediction'])] = 1
            data_2.at[idx, 'sample1'][-1] = direction_vec
            data_2.at[idx, 'agent_loc'] = data.at[idx - 1, 'predict_loc']
    data_2 = data_2.drop(list(data_2[data_2.time == min_time].index))
    return data_2
def redandclean():
	data = pd.read_csv("cleanedata/cleandata.csv")
	
	train=data.copy()

	print('Training Features shape: ', train.shape)

	(train['DAYS_BIRTH']/-365).describe()

	thousand_anomalies = train[(train['DAYS_EMPLOYED']/365>=900) & (train['DAYS_EMPLOYED']/365<=1100)]
	
	#Most anomalies were able to repay on time. But how can they be contrasted in relation to non anomalies?
	# get the index of anomalies and non anomalies
	anomalies_index = pd.Index(thousand_anomalies.index)
	non_anomalies_index = train.index.difference(anomalies_index)
	# get the anomalies records
	non_anomalies = train.iloc[non_anomalies_index]
	# get the anomaly targets
	anomalies_target = thousand_anomalies['TARGET'].value_counts()
	non_anomalies_target = non_anomalies['TARGET'].value_counts()
	# find the default rate for anomalies and non anomalies

	print("Anomalies have a default rate of {}%".format(100*anomalies_target[1]/(anomalies_target[1]+anomalies_target[0])))

	# Create an anomalous flag column
	train['DAYS_EMPLOYED_ANOM'] = train["DAYS_EMPLOYED"] == 365243

	# Replace the anomalous values with nan
	train['DAYS_EMPLOYED'] = train['DAYS_EMPLOYED'].replace({365243: np.nan})
	# Looking at the years employed for anomalies
	from sklearn.preprocessing import Imputer
	# poly_fitting_vars = ['EXT_SOURCE_3', 'EXT_SOURCE_2', 'EXT_SOURCE_1','DAYS_BIRTH']
	imputer = Imputer(missing_values='NaN', strategy='median')
	# train[poly_fitting_vars] = imputer.fit_transform(train[poly_fitting_vars])
	# train[poly_fitting_vars].shape

	# from sklearn.preprocessing import PolynomialFeatures
	# poly_feat = PolynomialFeatures(degree=4)
	# poly_interaction_train = poly_feat.fit_transform(train[poly_fitting_vars])

	# train['DIR'] = train['AMT_CREDIT']/train['AMT_INCOME_TOTAL']
	# train['AIR'] = train['AMT_ANNUITY']/train['AMT_INCOME_TOTAL']
	# train['ACR'] = train['AMT_ANNUITY']/train['AMT_CREDIT']
	# train['DAR'] = train['DAYS_EMPLOYED']/train['DAYS_BIRTH']
	
	sensetiveFeatures=['CODE_GENDER', 'NAME_INCOME_TYPE','NAME_FAMILY_STATUS','OCCUPATION_TYPE','ORGANIZATION_TYPE']
	X_num=train.copy()

	X_num=X_num.drop(columns=sensetiveFeatures)
	target = X_num['TARGET']
	X_num=X_num.drop(columns=['TARGET','Unnamed: 0'])
	X_num = pd.get_dummies(X_num)
	X_num = imputer.fit_transform(X_num)
	SenstiveData=train[sensetiveFeatures].copy()

	a=train.groupby(sensetiveFeatures).count()
	# print(a['Unnamed: 0'],np.max(a['Unnamed: 0']),np.min(a['Unnamed: 0']), np.mean(a['Unnamed: 0']),np.std(a['Unnamed: 0']) )
	#5262 1 24.483358459932738 136.4728162223554
	
	for i in range(1):	
		SensVector=Mytransformer(sensetiveFeatures[i], SenstiveData)
		model=leaningClassifirofSensetive(X_num, SensVector,nEpoches=20)
    def __init__(self,
                 root,
                 classes,
                 memory_classes,
                 memory,
                 task_num,
                 train,
                 transform=None,
                 target_transform=None,
                 download=True):

        super(iMNIST, self).__init__(root,
                                     task_num,
                                     transform=transform,
                                     target_transform=target_transform,
                                     download=download)

        self.train = train  # training set or test set
        self.root = root
        self.target_transform = target_transform
        self.transform = transform
        if download:
            self.download()

        if not self._check_exists():
            raise RuntimeError('Dataset not found.' +
                               ' You can use download=True to download it')

        if self.train:
            data_file = self.training_file
        else:
            data_file = self.test_file

        self.data, self.targets = torch.load(
            os.path.join(self.processed_folder, data_file))
        self.data = np.array(self.data).astype(np.float32)
        self.targets = list(np.array(self.targets))

        self.train = train  # training set or test set
        if not isinstance(classes, list):
            classes = [classes]

        self.class_mapping = {c: i for i, c in enumerate(classes)}
        self.class_indices = {}

        for cls in classes:
            self.class_indices[self.class_mapping[cls]] = []

        data = []
        targets = []
        tt = []  # task module labels
        td = []  # discriminator labels

        for i in range(len(self.data)):
            if self.targets[i] in classes:
                data.append(self.data[i])
                targets.append(self.class_mapping[self.targets[i]])
                tt.append(task_num)
                td.append(task_num + 1)
                self.class_indices[self.class_mapping[self.targets[i]]].append(
                    i)

        if self.train:
            if memory_classes:
                for task_id in range(task_num):
                    for i in range(len(memory[task_id]['x'])):
                        if memory[task_id]['y'][i] in range(
                                len(memory_classes[task_id])):
                            data.append(memory[task_id]['x'][i])
                            targets.append(memory[task_id]['y'][i])
                            tt.append(memory[task_id]['tt'][i])
                            td.append(memory[task_id]['td'][i])

        self.data = data.copy()
        self.targets = targets.copy()
        self.tt = tt.copy()
        self.td = td.copy()
def main():
    global i
    global epoch
    global loss_sum
    global running
    parser = ArgumentParser()

    # Either define those arguments individually or choose one of the profiles given further down in the code

    parser.add_argument("-model",
                        type=str,
                        default="dnc",
                        help="Network Model")
    # Training Details
    parser.add_argument("-task",
                        type=str,
                        default="babi",
                        help="Task to learn")
    parser.add_argument(
        "-n_subbatch",
        type=str,
        default="auto",
        help="Average this much forward passes to a backward pass")
    parser.add_argument("-max_input_count_per_batch",
                        type=int,
                        default=6000,
                        help="Max batch_size*len that can fit into memory")
    parser.add_argument("-test_interval",
                        type=int,
                        default=500,
                        help="Run test in this interval")
    parser.add_argument("-lr",
                        type=float,
                        default=0.0001,
                        help="Learning rate")
    parser.add_argument("-lr_scheduler",
                        type=str,
                        default="none",
                        help="Define Learning Rate Scheduler")
    parser.add_argument("-lr_step",
                        type=int,
                        default=10,
                        help="Epochs before lr scheduler does a step")
    parser.add_argument("-cyc_base",
                        type=float,
                        default=0.0001,
                        help="Base LR for Cyclic LR")
    parser.add_argument("-cyc_max",
                        type=float,
                        default=0.005,
                        help="Max LR for Cyclic LR")
    parser.add_argument("-wd", type=float, default=1e-5, help="Weight decay")
    parser.add_argument("-optimizer",
                        type=str,
                        default="rmsprop",
                        help="Optimizer algorithm")
    parser.add_argument("-momentum",
                        type=float,
                        default=0.9,
                        help="Momentum for optimizer")
    parser.add_argument("-preview_interval",
                        type=int,
                        default=10,
                        help="Show preview every nth iteration")
    parser.add_argument("-info_interval",
                        type=int,
                        default=10,
                        help="Show info every nth iteration")
    parser.add_argument("-gpu",
                        default="auto",
                        type=str,
                        help="Run on this GPU.")
    parser.add_argument("-test_on_start", default="0", save=False)
    parser.add_argument("-test_batch_size", default=16)
    parser.add_argument("-grad_clip",
                        type=float,
                        default=10.0,
                        help="Max gradient norm")
    parser.add_argument("-clip_controller",
                        type=float,
                        default=20.0,
                        help="Max gradient norm")
    # Architectural/Structural Details
    parser.add_argument("-mem_count",
                        type=int,
                        default=16,
                        help="Number of memory cells")
    parser.add_argument("-data_word_size",
                        type=int,
                        default=128,
                        help="Memory word size")
    parser.add_argument("-n_read_heads",
                        type=int,
                        default=1,
                        help="Number of read heads")
    parser.add_argument("-controller_type",
                        type=str,
                        default="lstm",
                        help="Controller type: lstm or linear")
    parser.add_argument(
        "-layer_sizes",
        type=str,
        default="256",
        help="Controller layer sizes. Separate with ,. For example 512,256,256",
        parser=lambda x: [int(y) for y in x.split(",") if y])
    parser.add_argument(
        "-lstm_use_all_outputs",
        type=bool,
        default=1,
        help=
        "Use all LSTM outputs as controller output vs use only the last layer")
    # Csordas / Schmidhuber improvements
    parser.add_argument(
        "-dealloc_content",
        type=bool,
        default=1,
        help=
        "Deallocate memory content, unlike DNC, which leaves it unchanged, just decreases the usage counter, causing problems with lookup"
    )
    parser.add_argument(
        "-sharpness_control",
        type=bool,
        default=1,
        help="Distribution sharpness control for forward and backward links")
    # Logs, Savefiles, Debug
    parser.add_argument("-debug",
                        type=bool,
                        default=1,
                        help="Enable debugging")
    parser.add_argument("-debug_log",
                        type=bool,
                        default=0,
                        help="Enable debug log")
    parser.add_argument("-name",
                        type=str,
                        help="Save training to this directory")
    parser.add_argument("-save_interval",
                        type=int,
                        default=500,
                        help="Save network every nth iteration")
    parser.add_argument("-masked_lookup",
                        type=bool,
                        default=1,
                        help="Enable masking in content lookups")
    parser.add_argument("-mask_min", default=0.0)
    parser.add_argument(
        "-visport",
        type=int,
        default=-1,
        help="Port to run Visdom server on. -1 to disable")  # Visualisation
    parser.add_argument("-dump_profile", type=str, save=False)
    parser.add_argument("-dump_heatmaps", default=False, save=False)
    parser.add_argument("-noargsave",
                        type=bool,
                        default=False,
                        help="Do not save modified arguments",
                        save=False)
    parser.add_argument("-demo",
                        type=bool,
                        default=False,
                        help="Do a single step with fixed seed",
                        save=False)
    parser.add_argument(
        "-exit_after",
        type=int,
        help="Exit after this amount of steps. Useful for debugging.",
        save=False)
    # NLP Tasks, BaBi
    parser.add_argument(
        "-run_on_fraction",
        type=int,
        default=0,
        help="If >1, only 1/this part of the datasets will be used")
    parser.add_argument("-embedding_size",
                        type=int,
                        default=256,
                        help="Size of word embedding for NLP tasks")
    parser.add_argument("-dataset_path",
                        type=str,
                        default="/storage/remote/atcremers45/s0238/",
                        parser=ArgumentParser.str_or_none(),
                        help="Specify babi path manually")
    parser.add_argument("-babi_train_tasks",
                        type=str,
                        default="none",
                        parser=ArgumentParser.list_or_none(type=str),
                        help="babi task list to use for training")
    parser.add_argument("-babi_test_tasks",
                        type=str,
                        default="none",
                        parser=ArgumentParser.list_or_none(type=str),
                        help="babi task list to use for testing")
    parser.add_argument("-babi_train_sets",
                        type=str,
                        default="train",
                        parser=ArgumentParser.list_or_none(type=str),
                        help="babi train sets to use")
    parser.add_argument("-babi_test_sets",
                        type=str,
                        default="test",
                        parser=ArgumentParser.list_or_none(type=str),
                        help="babi test sets to use")
    parser.add_argument(
        "-think_steps",
        type=int,
        default=0,
        help="Iddle steps before requiring the answer (for bAbi)")
    parser.add_argument("-load", type=str,
                        save=False)  # TODO: What does this do?
    parser.add_argument("-print_test", default=False, save=False)
    # Copy Task
    parser.add_argument("-bit_w",
                        type=int,
                        default=8,
                        help="Bit vector length for copy task")
    parser.add_argument("-block_w",
                        type=int,
                        default=3,
                        help="Block width to associative recall task")
    parser.add_argument("-len",
                        type=str,
                        default="4",
                        help="Sequence length for copy task",
                        parser=lambda x: [int(a) for a in x.split("-")])
    parser.add_argument("-repeat",
                        type=str,
                        default="1",
                        help="Sequence length for copy task",
                        parser=lambda x: [int(a) for a in x.split("-")])
    parser.add_argument("-batch_size",
                        type=int,
                        default=16,
                        help="Sequence length for copy task")

    parser.add_profile([
        ArgumentParser.Profile("babi", {
            "preview_interval": 10,
            "save_interval": 500,
            "task": "babi",
            "mem_count": 256,
            "data_word_size": 64,
            "n_read_heads": 4,
            "layer_sizes": "256",
            "controller_type": "lstm",
            "lstm_use_all_outputs": True,
            "momentum": 0.9,
            "embedding_size": 128,
            "test_interval": 5000,
            "think_steps": 3,
            "batch_size": 2
        },
                               include=["dnc-msd"]),
        ArgumentParser.Profile(
            "repeat_copy", {
                "bit_w": 8,
                "repeat": "1-8",
                "len": "2-14",
                "task": "copy",
                "think_steps": 1,
                "preview_interval": 10,
                "info_interval": 10,
                "save_interval": 100,
                "data_word_size": 16,
                "layer_sizes": "32",
                "n_subbatch": 1,
                "controller_type": "lstm",
            }),
        ArgumentParser.Profile("repeat_copy_simple", {
            "repeat": "1-3",
        },
                               include="repeat_copy"),
        ArgumentParser.Profile(
            "dnc", {
                "masked_lookup": False,
                "sharpness_control": False,
                "dealloc_content": False
            }),
        ArgumentParser.Profile(
            "dnc-m", {
                "masked_lookup": True,
                "sharpness_control": False,
                "dealloc_content": False
            }),
        ArgumentParser.Profile(
            "dnc-s", {
                "masked_lookup": False,
                "sharpness_control": True,
                "dealloc_content": False
            }),
        ArgumentParser.Profile(
            "dnc-d", {
                "masked_lookup": False,
                "sharpness_control": False,
                "dealloc_content": True
            }),
        ArgumentParser.Profile(
            "dnc-md", {
                "masked_lookup": True,
                "sharpness_control": False,
                "dealloc_content": True
            }),
        ArgumentParser.Profile(
            "dnc-ms", {
                "masked_lookup": True,
                "sharpness_control": True,
                "dealloc_content": False
            }),
        ArgumentParser.Profile(
            "dnc-sd", {
                "masked_lookup": False,
                "sharpness_control": True,
                "dealloc_content": True
            }),
        ArgumentParser.Profile(
            "dnc-msd", {
                "masked_lookup": True,
                "sharpness_control": True,
                "dealloc_content": True
            }),
        ArgumentParser.Profile(
            "keyvalue", {
                "repeat": "1",
                "len": "2-16",
                "mem_count": 16,
                "task": "keyvalue",
                "think_steps": 1,
                "preview_interval": 10,
                "info_interval": 10,
                "data_word_size": 32,
                "bit_w": 12,
                "save_interval": 1000,
                "layer_sizes": "32"
            }),
        ArgumentParser.Profile("keyvalue2way", {
            "task": "keyvalue2way",
        },
                               include="keyvalue"),
        ArgumentParser.Profile(
            "associative_recall", {
                "task": "recall",
                "bit_w": 8,
                "len": "2-16",
                "mem_count": 64,
                "data_word_size": 32,
                "n_read_heads": 1,
                "layer_sizes": "128",
                "controller_type": "lstm",
                "lstm_use_all_outputs": 1,
                "think_steps": 1,
                "mask_min": 0.1,
                "info_interval": 10,
                "save_interval": 1000,
                "preview_interval": 10,
                "n_subbatch": 1,
            })
    ])

    opt = parser.parse()
    assert opt.name is not None, "Training dir (-name parameter) not given"
    opt = parser.sync(os.path.join(opt.name, "args.json"),
                      save=not opt.noargsave)

    if opt.demo:
        Seed.fix()

    os.makedirs(os.path.join(opt.name, "save"), exist_ok=True)
    os.makedirs(os.path.join(opt.name, "preview"), exist_ok=True)

    gpu_allocator.use_gpu(opt.gpu)

    debug.enableDebug = opt.debug_log

    if opt.visport > 0:
        Visdom.start(opt.visport)

    class LengthHackSampler:
        """
        I don't know exactly what it is needed for, but an object of this class can return a generator object that,
        when iterated over, always yields a list with n elements off the same value, m, where n=batch_size and
        m=length.
        Only used in BitMapTaskRepeater task
        """
        def __init__(self, batch_size, length):
            self.length = length
            self.batch_size = batch_size

        def __iter__(self):
            while True:
                len = self.length() if callable(self.length) else self.length
                yield [len] * self.batch_size

        def __len__(self):
            return 0x7FFFFFFF

    embedding = None
    test_set = None
    curriculum = None
    loader_reset = False

    # Check the task and initialize dataset and metaparameters
    if opt.task == "copy":
        dataset = CopyData(bit_w=opt.bit_w)
        in_size = opt.bit_w + 1
        out_size = in_size
    elif opt.task == "recall":
        dataset = AssociativeRecall(bit_w=opt.bit_w, block_w=opt.block_w)
        in_size = opt.bit_w + 2
        out_size = in_size
    elif opt.task == "keyvalue":
        assert opt.bit_w % 2 == 0, "Key-value datasets works only with even bit_w"
        dataset = KeyValue(bit_w=opt.bit_w)
        in_size = opt.bit_w + 1
        out_size = opt.bit_w // 2
    elif opt.task == "keyvalue2way":
        assert opt.bit_w % 2 == 0, "Key-value datasets works only with even bit_w"
        dataset = KeyValue2Way(bit_w=opt.bit_w)
        in_size = opt.bit_w + 2
        out_size = opt.bit_w // 2
    elif opt.task == "babi":
        dataset = bAbiDataset(think_steps=opt.think_steps,
                              dir_name=opt.dataset_path,
                              name="Train")
        test_set = bAbiDataset(think_steps=opt.think_steps,
                               dir_name=opt.dataset_path,
                               name="Validation")
        dataset.use(opt.babi_train_tasks, opt.babi_train_sets)
        in_size = opt.embedding_size
        print("bAbi: loaded total of %d sequences." % len(dataset))
        test_set.use(opt.babi_test_tasks, opt.babi_test_sets)
        out_size = len(dataset.vocabulary)
        print("bAbi: using %d sequences for training, %d for testing" %
              (len(dataset), len(test_set)))
    elif opt.task in ["ptb", "PTB"]:
        dataset = PTB('test', seq_len=15)
        test_set = PTB('validation', seq_len=15)
        in_size = opt.embedding_size
        print("Loaded dateset with {d} and test set with {t} elements".format(
            d=len(dataset), t=len(test_set)))
        out_size = len(dataset.vocabulary)
        print("PTB: using a total vocabulary of {} words".format(out_size))
    else:
        assert False, "Invalid task: %s" % opt.task

    if opt.task in ["babi"]:
        print("Babi Batchsize: ", opt.batch_size, "Test Batchsize: ",
              opt.test_batch_size)
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=opt.batch_size,
                                                  num_workers=4,
                                                  pin_memory=True,
                                                  shuffle=True,
                                                  collate_fn=MetaCollate())
        test_loader = torch.utils.data.DataLoader(
            test_set,
            batch_size=opt.test_batch_size,
            num_workers=opt.test_batch_size,
            pin_memory=True,
            shuffle=False,
            collate_fn=MetaCollate() if test_set is not None else None)
    elif opt.task in ["ptb", 'PTB']:
        if opt.run_on_fraction > 1:
            sampler = torch.utils.data.SequentialSampler(
                list(range(0, len(dataset), opt.run_on_fraction)))
            data_loader = torch.utils.data.DataLoader(
                dataset,
                batch_size=opt.batch_size,
                sampler=sampler,
                collate_fn=MetaCollate())
            test_loader = torch.utils.data.DataLoader(
                test_set,
                batch_size=opt.test_batch_size,
                sampler=sampler,
                collate_fn=MetaCollate())
        else:
            data_loader = torch.utils.data.DataLoader(
                dataset,
                batch_size=opt.batch_size,
                shuffle=True,
                collate_fn=MetaCollate())
            test_loader = torch.utils.data.DataLoader(
                test_set,
                batch_size=opt.test_batch_size,
                shuffle=False,
                collate_fn=MetaCollate())
    else:
        dataset = BitmapTaskRepeater(dataset)
        lhs = LengthHackSampler(
            opt.batch_size,
            BitmapTaskRepeater.key_sampler(opt.len, opt.repeat))
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_sampler=lhs,
                                                  num_workers=1,
                                                  pin_memory=True)

    # Setting up the controller for the DNC
    if opt.controller_type == "lstm":
        controller_constructor = functools.partial(
            LSTMController, out_from_all_layers=opt.lstm_use_all_outputs)
    elif opt.controller_type == "linear":
        controller_constructor = FeedforwardController
    else:
        assert False, "Invalid controller: %s" % opt.controller_type

    device = torch.device('cuda') if opt.gpu != "none" else torch.device("cpu")
    print("DEVICE: ", device)
    print("Current model: ", opt.model)

    if opt.model.lower() == 'dnc':
        model = DNC(in_size,
                    out_size,
                    opt.data_word_size,
                    opt.mem_count,
                    opt.n_read_heads,
                    controller_constructor(opt.layer_sizes),
                    batch_first=True,
                    mask=opt.masked_lookup,
                    dealloc_content=opt.dealloc_content,
                    link_sharpness_control=opt.sharpness_control,
                    mask_min=opt.mask_min,
                    clip_controller=opt.clip_controller)
    elif opt.model.lower() == 'lstm':
        model = NLP_LSTM(out_size, in_size, sentence_length=10, device=device)
    elif opt.model.lower() == 'entnet':
        print(opt.task)
        model = RecurrentEntityNetwork(vocabulary_size=out_size,
                                       embedding_dim=in_size,
                                       sentence_lenght=10,
                                       device=device,
                                       task=opt.task)
    else:
        raise ValueError("Invalid model: {}".format(opt.model))

    params = [{
        'params':
        [p for n, p in model.named_parameters() if not n.endswith(".bias")]
    }, {
        'params':
        [p for n, p in model.named_parameters() if n.endswith(".bias")],
        'weight_decay':
        0
    }]

    if isinstance(dataset, NLPTask):
        embedding = torch.nn.Embedding(len(dataset.vocabulary),
                                       in_size).to(device)
        params.append({'params': embedding.parameters(), 'weight_decay': 0})

    if opt.optimizer == "sgd":
        optimizer = torch.optim.SGD(params,
                                    lr=opt.lr,
                                    weight_decay=opt.wd,
                                    momentum=opt.momentum)
    elif opt.optimizer == "adam":
        optimizer = torch.optim.Adam(params, lr=opt.lr, weight_decay=opt.wd)
    elif opt.optimizer == "rmsprop":
        optimizer = torch.optim.RMSprop(params,
                                        lr=opt.lr,
                                        weight_decay=opt.wd,
                                        momentum=opt.momentum,
                                        eps=1e-10)
    else:
        assert "Invalid optimizer: %s" % opt.optimizer

    lr_scheduler = None
    if opt.lr_scheduler == 'step':
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       opt.lr_step,
                                                       gamma=0.5)
    elif opt.lr_scheduler == 'cyclic':
        lr_scheduler = torch.optim.lr_scheduler.CyclicLR(
            optimizer, opt.cyc_base, opt.cyc_max, opt.lr_step)

    n_params = sum([sum([t.numel() for t in d['params']]) for d in params])

    model = model.to(device)
    if embedding is not None and hasattr(embedding, "to"):
        embedding = embedding.to(device)

    i = 0
    epoch = 0
    loss_sum = 0

    Visdom.Text("Information").set(
        '<b>Name:</b> {n}<br><b>Batchsize:</b> {b}<br><b>Train Task:</b> {tt}, {tdp} data points <br>'
        '<b>Validation Task:</b> {vt}, {vdp} data points<br><b>Running on:</b> {device}<br>'
        '<b>Parameters:</b> {np}<br><b>Model:</b> {m}<br><b>Optimizer:</b> {opt}<br>'
        '<b>Initial LR:</b> {ilr}<br><b>Weight Decay:</b> {wd}<br>'
        '<b>Learning rate scheduler:</b> {lrs}<br><b>Start Time:</b> {dt}'.
        format(n=opt.name,
               b=opt.batch_size,
               tt=opt.babi_train_tasks or opt.task,
               vt=opt.babi_test_tasks or opt.task,
               tdp=len(dataset),
               vdp=len(test_set),
               device=device,
               np=n_params,
               m=opt.model,
               opt=opt.optimizer,
               ilr=opt.lr,
               wd=opt.wd,
               lrs=opt.lr_scheduler,
               dt=datetime.now().strftime("%a %d/%m/%Y, %H:%M")))

    loss_plot = Visdom.Plot2D("Train Loss",
                              store_interval=opt.info_interval,
                              xlabel="iterations",
                              ylabel="loss")
    test_loss_plot = Visdom.Plot2D("Validation Loss",
                                   store_interval=1,
                                   xlabel="Epoch",
                                   ylabel="Loss")
    ppl_plot = Visdom.Plot2D("Perplexity on Validation Data",
                             store_interval=1,
                             xlabel="Epoch",
                             ylabel="Perplexity")
    lr_plot = Visdom.Plot2D("Learning Rate",
                            store_interval=1,
                            xlabel="epochs",
                            ylabel="Learning Rate")

    if curriculum is not None:
        curriculum_plot = Visdom.Plot2D(
            "curriculum lesson" + (" (last %d)" % (curriculum.n_lessons - 1) if
                                   curriculum.n_lessons is not None else ""),
            xlabel="iterations",
            ylabel="lesson")
        curriculum_accuracy = Visdom.Plot2D("curriculum accuracy",
                                            xlabel="iterations",
                                            ylabel="accuracy")

    saver = Saver(os.path.join(opt.name, "save"),
                  short_interval=opt.save_interval)
    saver.register("model", StateSaver(model))
    saver.register("optimizer", StateSaver(optimizer))
    saver.register("i", GlobalVarSaver("i"))
    saver.register("epoch", GlobalVarSaver("epoch"))
    saver.register("loss_sum", GlobalVarSaver("loss_sum"))
    saver.register("loss_plot", StateSaver(loss_plot))
    saver.register("lr_plot", StateSaver(lr_plot))
    saver.register("train_loss_plot", StateSaver(test_loss_plot))
    saver.register("ppl_plot", StateSaver(ppl_plot))
    saver.register("dataset", StateSaver(dataset))
    if lr_scheduler:
        saver.register("lr_scheduler", StateSaver(lr_scheduler))
    if test_set:
        pass
        # saver.register("test_set", StateSaver(test_set))

    if curriculum is not None:
        saver.register("curriculum", StateSaver(curriculum))
        saver.register("curriculum_plot", StateSaver(curriculum_plot))
        saver.register("curriculum_accuracy", StateSaver(curriculum_accuracy))

    if isinstance(dataset, NLPTask):
        saver.register("word_embeddings", StateSaver(embedding))
    elif embedding is not None:
        saver.register("embeddings", StateSaver(embedding))

    if not saver.load(opt.load):
        model.reset_parameters()
        if embedding is not None:
            embedding.reset_parameters()

    visualizers = {}

    debug_schemas = {
        "read_head": {
            "list_dim": 2
        },
        "temporal_links/forward_dists": {
            "list_dim": 2
        },
        "temporal_links/backward_dists": {
            "list_dim": 2
        }
    }

    def plot_debug(debug, prefix="", schema={}):
        if debug is None:
            return

        for k, v in debug.items():
            curr_name = prefix + k
            if curr_name in debug_schemas:
                curr_schema = schema.copy()
                curr_schema.update(debug_schemas[curr_name])
            else:
                curr_schema = schema

            if isinstance(v, dict):
                plot_debug(v, curr_name + "/", curr_schema)
                continue

            data = v[0]

            if curr_schema.get("list_dim", -1) > 0:
                if data.ndim != 3:
                    print(
                        "WARNING: unknown data shape for array display: %s, tensor %s"
                        % (data.shape, curr_name))
                    continue

                n_steps = data.shape[curr_schema["list_dim"] - 1]
                if curr_name not in visualizers:
                    visualizers[curr_name] = [
                        Visdom.Heatmap(
                            curr_name + "_%d" % i,
                            dumpdir=os.path.join(opt.name, "preview")
                            if opt.dump_heatmaps else None)
                        for i in range(n_steps)
                    ]

                for i in range(n_steps):
                    visualizers[curr_name][i].draw(
                        index_by_dim(data, curr_schema["list_dim"] - 1, i))
            else:
                if data.ndim != 2:
                    print(
                        "WARNING: unknown data shape for simple display: %s, tensor %s"
                        % (data.shape, curr_name))
                    continue

                if curr_name not in visualizers:
                    visualizers[curr_name] = Visdom.Heatmap(
                        curr_name,
                        dumpdir=os.path.join(opt.name, "preview")
                        if opt.dump_heatmaps else None)

                visualizers[curr_name].draw(data)

    def run_model(input, debug=None):
        if isinstance(dataset, NLPTask):
            input = input["input"]
        else:
            input = input["input"] * 2.0 - 1.0
        full = False if opt.task in ['PTB', 'ptb'] else True
        return model(input, embed=embedding, full=full)  # debug=debug

    def multiply_grads(params, mul):
        if mul == 1:
            return

        for pa in params:
            for p in pa["params"]:
                p.grad.data *= mul

    def test():
        if test_set is None:
            return

        start_time = time.time()
        t = test_set.start_test()

        test_loss = []

        with torch.no_grad():
            for data in tqdm(test_loader):
                data = {
                    k: v.to(device) if torch.is_tensor(v) else v
                    for k, v in data.items()
                }
                if hasattr(dataset, "prepare"):
                    data = dataset.prepare(data)

                net_out = run_model(data)
                test_set.verify_result(t, data, net_out)

                test_loss.append(
                    dataset.loss(net_out, data["output"]).data.item())
            avg_loss = sum(test_loss) / len(test_loss)
            perplexity = math.exp(avg_loss)
            test_loss_plot.add_point(epoch, avg_loss)
            if epoch > 5:  # Perplexity is immensely high in the beginning
                ppl_plot.add_point(epoch, perplexity)

        test_set.show_test_results(epoch, t)
        print("Test done in %gs" % (time.time() - start_time))

    # def test_on_train(train_data):
    #     with torch.no_grad():
    #         net_out = run_model(train_data)

    print("Test interval: ", opt.test_interval)
    if opt.test_on_start.lower() in ["on", "1", "true", "quit"]:
        test()
        if opt.test_on_start.lower() == "quit":
            saver.write(i)
            sys.exit(-1)

    if opt.print_test:
        model.eval()
        total = 0
        correct = 0
        with torch.no_grad():
            for data in tqdm(test_loader):
                if not running:
                    return

                data = {
                    k: v.to(device) if torch.is_tensor(v) else v
                    for k, v in data.items()
                }
                if hasattr(test_set, "prepare"):
                    data = test_set.prepare(data)

                net_out = run_model(data)

                c, t = test_set.curriculum_measure(net_out, data["output"])
                total += t
                correct += c

        print("Test result: %2.f%% (%d out of %d correct)" %
              (100.0 * correct / total, correct, total))
        model.train()
        return

    iter_start_time = time.time() if i % opt.info_interval == 0 else None
    data_load_total_time = 0

    start_i = i

    if opt.dump_profile:
        profiler = torch.autograd.profiler.profile(use_cuda=True)

    if opt.dump_heatmaps:
        dataset.set_dump_dir(os.path.join(opt.name, "preview"))

    @preview()
    def do_visualize(raw_data, output, pos_map, debug):
        if pos_map is not None:
            output = embedding.backmap_output(output, pos_map,
                                              raw_data["output"].shape[1])
        dataset.visualize_preview(raw_data, output)

        if debug is not None:
            plot_debug(debug)

    preview_timer = OnceEvery(opt.preview_interval)

    pos_map = None
    start_iter = i

    if curriculum is not None:
        curriculum.init()
    """
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    !                                       !
    !!                                     !!
    !!!                                   !!!
    !!!! Actual Running Mode starts here !!!!
    !!!                                   !!!
    !!                                     !!
    !                                       !
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    """
    while running:
        data_load_timer = time.time()
        epoch += 1
        avg_acc = {}
        print("Epoch {e}".format(e=epoch))
        for data in data_loader:

            if not running:
                break

            if loader_reset:
                print("Loader reset requested. Resetting...")
                loader_reset = False
                if curriculum is not None:
                    curriculum.lesson_started()
                break

            if opt.dump_profile:
                if i == start_i + 1:
                    print("Starting profiler")
                    profiler.__enter__()
                elif i == start_i + 5 + 1:
                    print("Stopping profiler")
                    profiler.__exit__(None, None, None)
                    print("Average stats")
                    print(profiler.key_averages().table("cpu_time_total"))
                    print("Writing trace to file")
                    profiler.export_chrome_trace(opt.dump_profile)
                    print("Done.")
                    sys.exit(0)
                else:
                    print("Step %d out of 5" % (i - start_i))

            debug.dbg_print("-------------------------------------")
            raw_data = data

            data = {
                k: v.to(device) if torch.is_tensor(v) else v
                for k, v in data.items()
            }  # Transform generic torch tensor to right device torch tensor
            if hasattr(dataset, "prepare"):
                data = dataset.prepare(data)

            data_load_total_time += time.time() - data_load_timer

            need_preview = preview_timer()
            debug_data = {} if opt.debug and need_preview else None

            optimizer.zero_grad()

            if opt.n_subbatch == "auto":
                n_subbatch = math.ceil(data["input"].numel() /
                                       opt.max_input_count_per_batch)
            else:
                n_subbatch = int(opt.n_subbatch)

            real_batch = max(math.floor(opt.batch_size / n_subbatch), 1)
            n_subbatch = math.ceil(opt.batch_size / real_batch)
            remaining_batch = opt.batch_size % real_batch

            for subbatch in range(n_subbatch):
                if not running:
                    break
                input = data["input"]
                target = data["output"]
                # print(input.shape, target.shape)
                if n_subbatch != 1 and (subbatch * real_batch <
                                        input.shape[0]):
                    # print("from to: ", subbatch*real_batch, (subbatch+1)*real_batch)
                    input = input[subbatch * real_batch:(subbatch + 1) *
                                  real_batch]
                    target = target[subbatch * real_batch:(subbatch + 1) *
                                    real_batch]
                # print(input.shape, target.shape)
                f2 = data.copy()
                f2["input"] = input
                output = run_model(
                    f2
                )  #  debug=debug_data if subbatch == n_subbatch - 1 else None
                # on shape: Batchsize x longest_sequence_length
                # out shape: Batchsize x longest_sequence_length x Vocablury length
                l = dataset.loss(output, target)
                # print("remaining", remaining_batch)
                try:
                    debug.nan_check(l, force=True)
                except SystemExit:
                    print("in and out : ", input.shape, input, output.shape,
                          output)
                    print("subbatch in nsub realbatch", subbatch, n_subbatch,
                          real_batch)
                    print("f2", f2)
                    print("data", data)
                    print("expected out and in 2: ", f2['output'].shape,
                          f2['input'].shape)
                    print("expected out and in 1: ", data['output'].shape,
                          data['input'].shape)
                    print("remaining batch", remaining_batch)
                    print("NaN check not passed")
                    sys.exit(1)
                l.backward()

                if curriculum is not None:
                    curriculum.update(
                        *dataset.curriculum_measure(output, target))

                if remaining_batch != 0 and subbatch == n_subbatch - 2:
                    multiply_grads(params, real_batch / remaining_batch)

            if n_subbatch != 1:
                if remaining_batch == 0:
                    multiply_grads(params, 1 / n_subbatch)
                else:
                    multiply_grads(params, remaining_batch / opt.batch_size)

            for p in params:
                try:
                    torch.nn.utils.clip_grad_norm_(p["params"], opt.grad_clip)
                except RuntimeError:
                    pass  # lstm cannot handle this right now

            optimizer.step()

            i += 1

            curr_loss = l.data.item()
            loss_plot.add_point(i, curr_loss)

            loss_sum += curr_loss

            if i % opt.info_interval == 0:
                tim = time.time()
                loss_avg = loss_sum / opt.info_interval

                if curriculum is not None:
                    curriculum_accuracy.add_point(i, curriculum.get_accuracy())
                    curriculum_plot.add_point(i, curriculum.step)

                message = "Iteration %d, loss: %.4f" % (i, loss_avg)
                if iter_start_time is not None:
                    message += " (%.2f ms/iter, load time %.2g ms/iter, visport: %s)" % (
                        (tim - iter_start_time) / opt.info_interval * 1000.0,
                        data_load_total_time / opt.info_interval * 1000.0,
                        Visdom.port)
                print(message)
                iter_start_time = tim
                loss_sum = 0
                data_load_total_time = 0

            debug.dbg_print("Iteration %d, loss %g" % (i, curr_loss))

            if need_preview:
                do_visualize(raw_data, output, pos_map, debug_data)

            dataset.verify_result(avg_acc, data, output)

            debug_tick = saver.tick(i)

            if opt.demo and opt.exit_after is None:
                running = False
                input("Press enter to quit.")

            if opt.exit_after is not None and (i -
                                               start_iter) >= opt.exit_after:
                running = False

            data_load_timer = time.time()

        if running:  # Once every epoch
            test()

        for param_g in optimizer.param_groups:
            lr_plot.add_point(epoch, param_g['lr'])
            break

        dataset.show_test_results(epoch, avg_acc, x_label='Epoch')

        if lr_scheduler:
            lr_scheduler.step()
Beispiel #18
0
def upsnet_test():

    pprint.pprint(config)
    logger.info('test config:{}\n'.format(pprint.pformat(config)))

    # create models
    gpus = [int(_) for _ in config.gpus.split(',')]
    test_model = eval(config.symbol)().cuda(device=gpus[0])

    # create data loader
    test_dataset = eval(config.dataset.dataset)(
        image_sets=config.dataset.test_image_set.split('+'),
        flip=False,
        result_path=final_output_path,
        phase='test')
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.test.batch_size,
        shuffle=False,
        num_workers=0,
        drop_last=False,
        pin_memory=False,
        collate_fn=test_dataset.collate)

    if args.eval_only:
        results = pickle.load(
            open(
                os.path.join(final_output_path, 'results', 'results_list.pkl'),
                'rb'))
        if config.test.vis_mask:
            test_dataset.vis_all_mask(
                results['all_boxes'], results['all_masks'],
                os.path.join(final_output_path, 'results', 'vis'))
        if config.network.has_rcnn:
            test_dataset.evaluate_boxes(
                results['all_boxes'], os.path.join(final_output_path,
                                                   'results'))
        if config.network.has_mask_head:
            test_dataset.evaluate_masks(
                results['all_boxes'], results['all_masks'],
                os.path.join(final_output_path, 'results'))
        if config.network.has_fcn_head:
            test_dataset.evaluate_ssegs(
                results['all_ssegs'],
                os.path.join(final_output_path, 'results', 'ssegs'))
            # logging.info('combined pano result:')
            # test_dataset.evaluate_panoptic(test_dataset.get_combined_pan_result(results['all_ssegs'], results['all_boxes'], results['all_masks'], stuff_area_limit=config.test.panoptic_stuff_area_limit), os.path.join(final_output_path, 'results', 'pans_combined'))
        if config.network.has_panoptic_head:
            logging.info('unified pano result:')
            test_dataset.evaluate_panoptic(
                test_dataset.get_unified_pan_result(
                    results['all_ssegs'],
                    results['all_panos'],
                    results['all_pano_cls_inds'],
                    stuff_area_limit=config.test.panoptic_stuff_area_limit),
                os.path.join(final_output_path, 'results', 'pans_unified'))
        sys.exit()

    # preparing
    curr_iter = config.test.test_iteration
    if args.weight_path == '':
        test_model.load_state_dict(torch.load(
            os.path.join(
                os.path.join(
                    os.path.join(config.output_path,
                                 os.path.basename(args.cfg).split('.')[0]),
                    '_'.join(config.dataset.image_set.split('+')),
                    config.model_prefix + str(curr_iter) + '.pth'))),
                                   resume=True)
    else:
        test_model.load_state_dict(torch.load(args.weight_path), resume=True)

    for p in test_model.parameters():
        p.requires_grad = False

    test_model = DataParallel(test_model, device_ids=gpus,
                              gather_output=False).to(gpus[0])

    # start training
    test_model.eval()

    i_iter = 0
    idx = 0
    test_iter = test_loader.__iter__()
    all_boxes = [[] for _ in range(test_dataset.num_classes)]
    if config.network.has_mask_head:
        all_masks = [[] for _ in range(test_dataset.num_classes)]
    if config.network.has_fcn_head:
        all_ssegs = []
    if config.network.has_panoptic_head:
        all_panos = []
        all_pano_cls_inds = []
        panos = []

    data_timer = Timer()
    net_timer = Timer()
    post_timer = Timer()

    #     while i_iter < len(test_loader):
    while i_iter < 5:
        data_timer.tic()
        batch = []
        labels = []
        for gpu_id in gpus:
            try:
                data, label, _ = test_iter.next()
                if label is not None:
                    data['roidb'] = label['roidb']
                for k, v in data.items():
                    data[k] = v.pin_memory().to(
                        gpu_id, non_blocking=True) if torch.is_tensor(v) else v
            except StopIteration:
                data = data.copy()
                for k, v in data.items():
                    data[k] = v.pin_memory().to(
                        gpu_id, non_blocking=True) if torch.is_tensor(v) else v
            batch.append((data, None))
            labels.append(label)
            i_iter += 1

        im_infos = [_[0]['im_info'][0] for _ in batch]

        data_time = data_timer.toc()
        if i_iter > 10:
            net_timer.tic()
        with torch.no_grad():
            output = test_model(*batch)
            torch.cuda.synchronize()
            if i_iter > 10:
                net_time = net_timer.toc()
            else:
                net_time = 0
            output = im_detect(output, batch, im_infos)
        post_timer.tic()
        for score, box, mask, cls_idx, im_info in zip(output['scores'],
                                                      output['boxes'],
                                                      output['masks'],
                                                      output['cls_inds'],
                                                      im_infos):
            im_post(all_boxes, all_masks, score, box, mask, cls_idx,
                    test_dataset.num_classes,
                    np.round(im_info[:2] / im_info[2]).astype(np.int32))
            idx += 1
        if config.network.has_fcn_head:
            for i, sseg in enumerate(output['ssegs']):
                sseg = sseg.squeeze(0).astype(
                    'uint8')[:int(im_infos[i][0]), :int(im_infos[i][1])]
                all_ssegs.append(
                    cv2.resize(sseg,
                               None,
                               None,
                               fx=1 / im_infos[i][2],
                               fy=1 / im_infos[i][2],
                               interpolation=cv2.INTER_NEAREST))
        if config.network.has_panoptic_head:
            pano_cls_inds = []
            for i, (pano, cls_ind) in enumerate(
                    zip(output['panos'], output['pano_cls_inds'])):
                pano = pano.squeeze(0).astype(
                    'uint8')[:int(im_infos[i][0]), :int(im_infos[i][1])]
                panos.append(
                    cv2.resize(pano,
                               None,
                               None,
                               fx=1 / im_infos[i][2],
                               fy=1 / im_infos[i][2],
                               interpolation=cv2.INTER_NEAREST))
                pano_cls_inds.append(cls_ind)

            all_panos.extend(panos)
            panos = []
            all_pano_cls_inds.extend(pano_cls_inds)
        post_time = post_timer.toc()
        s = 'Batch %d/%d, data_time:%.3f, net_time:%.3f, post_time:%.3f' % (
            idx, len(test_dataset), data_time, net_time, post_time)
        logging.info(s)

    results = []

    # trim redundant predictions
    for i in range(1, test_dataset.num_classes):
        all_boxes[i] = all_boxes[i][:len(test_loader)]
        if config.network.has_mask_head:
            all_masks[i] = all_masks[i][:len(test_loader)]
    if config.network.has_fcn_head:
        all_ssegs = all_ssegs[:len(test_loader)]
    if config.network.has_panoptic_head:
        all_panos = all_panos[:len(test_loader)]

    os.makedirs(os.path.join(final_output_path, 'results'), exist_ok=True)

    results = {
        'all_boxes':
        all_boxes,
        'all_masks':
        all_masks if config.network.has_mask_head else None,
        'all_ssegs':
        all_ssegs if config.network.has_fcn_head else None,
        'all_panos':
        all_panos if config.network.has_panoptic_head else None,
        'all_pano_cls_inds':
        all_pano_cls_inds if config.network.has_panoptic_head else None,
    }

    with open(os.path.join(final_output_path, 'results', 'results_list.pkl'),
              'wb') as f:
        pickle.dump(results, f, protocol=2)

    if config.test.vis_mask:
        test_dataset.vis_all_mask(
            all_boxes, all_masks,
            os.path.join(final_output_path, 'results', 'vis'))
    else:
        test_dataset.evaluate_boxes(all_boxes,
                                    os.path.join(final_output_path, 'results'))
        if config.network.has_mask_head:
            test_dataset.evaluate_masks(
                all_boxes, all_masks, os.path.join(final_output_path,
                                                   'results'))
        if config.network.has_panoptic_head:
            logging.info('unified pano result:')
            test_dataset.evaluate_panoptic(
                test_dataset.get_unified_pan_result(
                    all_ssegs,
                    all_panos,
                    all_pano_cls_inds,
                    stuff_area_limit=config.test.panoptic_stuff_area_limit),
                os.path.join(final_output_path, 'results', 'pans_unified'))
        if config.network.has_fcn_head:
            test_dataset.evaluate_ssegs(
                all_ssegs, os.path.join(final_output_path, 'results', 'ssegs'))
    def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None:
        data = inputs.horizontal_concat(outputs)
        data = data.copy()

        # mark datetime column
        times = data.metadata.list_columns_with_semantic_types(
            (
                "https://metadata.datadrivendiscovery.org/types/Time",
                "http://schema.org/DateTime",
            )
        )
        if len(times) != 1:
            raise ValueError(
                f"There are {len(times)} indices marked as datetime values. Please only specify one"
            )
        self._time_column = list(data)[times[0]]

        # if datetime columns are integers, parse as # of days
        if (
                "http://schema.org/Integer"
                in inputs.metadata.query_column(times[0])["semantic_types"]
        ):
            self._integer_time = True
            data[self._time_column] = pd.to_datetime(
                data[self._time_column] - 1, unit="D"
            )
        else:
            data[self._time_column] = pd.to_datetime(
                data[self._time_column], unit="s"
            )

        # sort by time column
        data = data.sort_values(by=[self._time_column])

        # mark key and grp variables
        self.key = data.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/PrimaryKey"
        )

        # mark target variables
        self._targets = data.metadata.list_columns_with_semantic_types(
            (
                "https://metadata.datadrivendiscovery.org/types/SuggestedTarget",
                "https://metadata.datadrivendiscovery.org/types/TrueTarget",
                "https://metadata.datadrivendiscovery.org/types/Target",
            )
        )
        self._target_types = [
            "i"
            if "http://schema.org/Integer"
               in data.metadata.query_column(t)["semantic_types"]
            else "c"
            if "https://metadata.datadrivendiscovery.org/types/CategoricalData"
               in data.metadata.query_column(t)["semantic_types"]
            else "f"
            for t in self._targets
        ]
        self._targets = [list(data)[t] for t in self._targets]

        self.target_column = self._targets[0]

        # see if 'GroupingKey' has been marked
        # otherwise fall through to use 'SuggestedGroupingKey'
        grouping_keys = data.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/GroupingKey"
        )
        suggested_grouping_keys = data.metadata.get_columns_with_semantic_type(
            "https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey"
        )
        if len(grouping_keys) == 0:
            grouping_keys = suggested_grouping_keys
            drop_list = []
        else:
            drop_list = suggested_grouping_keys

        grouping_keys_counts = [
            data.iloc[:, key_idx].nunique() for key_idx in grouping_keys
        ]
        grouping_keys = [
            group_key
            for count, group_key in sorted(zip(grouping_keys_counts, grouping_keys))
        ]
        self.filter_idxs = [list(data)[key] for key in grouping_keys]

        # drop index
        data.drop(
            columns=[list(data)[i] for i in drop_list + self.key], inplace=True
        )

        # check whether no grouping keys are labeled
        if len(grouping_keys) == 0:
            concat = pd.concat([data[self._time_column], data[self.target_column]], axis=1)
            concat.columns = ['ds', 'y']
            concat['unique_id'] = 'series1'  # We have only one series
        else:
            # concatenate columns in `grouping_keys` to unique_id column
            concat = data.loc[:, self.filter_idxs].apply(lambda x: ' '.join([str(v) for v in x]), axis=1)
            concat = pd.concat([concat,
                                data[self._time_column],
                                data[self.target_column]],
                               axis=1)
            concat.columns = ['unique_id', 'ds', 'y']

        # Series must be complete in the frequency
        concat = DeepMarkovModelPrimitive._ffill_missing_dates_per_serie(concat, 'D')

        # remove duplicates
        concat = concat.drop_duplicates(['unique_id', 'ds'])

        self._training_inputs = concat
Beispiel #20
0
    def __data_inverse_vae_sampling(self,
                                    data_row,
                                    num_samples,
                                    dataset="german"):
        """
        New sampling method which makes use of the trained variational autoencoder.

        Args:
            data_row: 1d numpy array, corresponding to a row
            num_samples: size of the neighborhood to learn the linear model
            dataset: Dataset on which the variational autoencoder was trained.

        Returns:
            A tuple (data, inverse), where:
                data: dense num_samples * K matrix, where categorical features
                are encoded with either 0 (not equal to the corresponding value
                in data_row) or 1. The first row is the original instance.
                inverse: same as data, except the categorical features are not
                binary, but categorical (as the original data)

        Raises:
            FileNotFoundError The VAE model of the given dataset can not be found and loaded.
        """

        # NEXT STEPS
        # Later: Categorical sampling improvements? https://blog.evjang.com/2016/11/tutorial-categorical-variational.html
        # Or look into the numerical features only?

        import torch.utils.data

        num_cols = data_row.shape[0]
        data = np.zeros((num_samples, num_cols))
        categorical_features = range(num_cols)
        instance_sample = data_row
        scale = self.scaler.scale_
        mean = self.scaler.mean_

        ##############################################
        # Insert VAE Sampling here
        if dataset == "german":
            from train_german_vae import VAE
        elif dataset == "compas":
            from train_compas_vae import VAE
        elif dataset == "cc":
            from train_cc_vae import VAE
        else:
            raise FileNotFoundError(
                "Please state one of the following datasets [german, compas, cc] and make sure"
                "that the respective VAE model exists.")

        device = torch.device(
            "cuda") if torch.cuda.is_available() else torch.device("cpu")
        model = VAE(data_row.shape[0]).to(device)
        model.load_state_dict(
            torch.load(
                "../../experiments/{0}/vae_lime_{0}.pt".format(dataset)))
        model.eval()

        with torch.no_grad():

            sample = torch.randn(num_samples, 30).to(device)

            # Test Idea: Encode data row once, and sample from generated latent space.
            # x = np.asarray(data_row, dtype=np.float32)
            # for i in range(num_samples):
            #    sample, _, _ = model.forward(torch.from_numpy(x).to(device))
            #    data[i] = sample.cpu().numpy().reshape(-1, num_cols)

            # results do not differ from standard random sampling into decode

            sample = model.decode(sample).cpu()
            data = sample.numpy().reshape(num_samples, num_cols)
            # data = [np.round(i, 0) for i in data]
        ##############################################

        # data = self.random_state.normal(
        #    0, 1, num_samples * num_cols).reshape(
        #    num_samples, num_cols)
        if self.sample_around_instance:
            data = data * scale + instance_sample
        else:
            data = data * scale + mean

        # Convert categorical sample columns to 0-1
        # Decide how to handle scaling here. I want to keep the scaling as is.
        # They mainly exploited numerical values. Think about the implications here for our approach.

        categorical_features = self.categorical_features
        first_row = data_row
        data[0] = data_row.copy()
        inverse = data.copy()
        for column in categorical_features:
            # data[:, column] = utils.one_hot_encode(data[:, column])
            data[:, column] = np.round(data[:, column])
            values = self.feature_values[column]
            freqs = self.feature_frequencies[column]

            # inverse_column = self.random_state.choice(values, size=num_samples,
            #                                          replace=True, p=freqs)

            # Here we NEED to copy!
            inverse_column = data[:, column].copy()
            binary_column = (inverse_column == first_row[column]).astype(int)
            binary_column[0] = 1
            inverse_column[0] = data[0, column]
            data[:, column] = binary_column
            inverse[:, column] = inverse_column
        inverse[0] = data_row
        return data, inverse
 def __init__(self, data):
     self.data = data.copy()
    def __init__(self, root, phase, debug = 1 ):
        file_name = phase
        fn = file_name

        mode = phase

        file_path = os.path.join(root, fn + '.csv')

        x_list = []
        y_list = []

        self.max_y = 0 
        self.min_data_list_size = 4 #TODO 2005 min data list 
            
        f = open( file_path, 'r', encoding='utf-8-sig' ) 
        # skip two lines
        f.readline()
        f.readline()

        # Train Dataset(2020. 01. 01 ~ 05. 01) 중 3월 30일은 수집 되지 않았음

        idx = 0 
        max_idx = 0  #TODO 1000 max idx

        min_data_list_size = self.min_data_list_size

        data_list = [ ]

        ymdh_prev = None  

        data_distcontinuous_cnt = 0 

        _999_encountered = 1 #-999 means predition data

        if mode == "test" :
            _999_encountered = 0 
        pass

        # 한 줄 씩 읽기
        for line in f.readlines():
            if max_idx and max_idx == idx :
                log.info( "**** max idx encounterd. %s" % max_idx )
                break
            pass
            
            row = np.array( line.strip().split(',') )

            ymd = int( row[0] )
            hour = int( row[1] )

            ymdh = datetime.datetime.strptime( "%04d %02d" % (ymd, hour), '%Y%m%d %H')

            data = None

            data = np.asfarray( row[2:], float )

            max_y = max( data )

            if max_y > self.max_y :
                self.max_y = max_y
            pass

            data_list_clear = 0 

            if ymdh_prev is None :
                pass
            else : 
                duration = ymdh - ymdh_prev 
                
                debug and log.info( "duration secons = %s" % duration.seconds )
                
                if duration.seconds != 3600 :
                    data_list_clear = 1 
                pass
            pass

            if data_list_clear : 
                # 시간차가 1시간이 아닐 경우, 데이터 목록을 재구성한다.
                data_list.clear()

                data_distcontinuous_cnt += 1 

                if debug : 
                    log.info( "[%s][%04d] %s" % (fn, idx, LINE) )
                    log.info( "[%s][%04d] hour is not continuous. ymdh = %s, ymdh_prev = %s" % (fn, idx, ymdh, ymdh_prev ) )
                    log.info( "[%s][%04d] %s" % (fn, idx, LINE) )
                pass
            pass

            debug and log.info( "[%s][%04d] %08d, %02d, input : %s" % (fn, idx, ymd, hour, str(data)) )

            if not _999_encountered :
                if ( -999 in data ) or ( -999.0 in data ) :
                    _999_encountered = 1
                    print( LINE )
                    log.info( "[%s][%04d] _999_encountered = %s" % (fn, idx, _999_encountered ) )
                    print( LINE )
                pass
            pass

            if len( data_list ) < min_data_list_size :
                data_list.append( data.copy() ) 
            else :
                if _999_encountered : 
                    # x data generation
                    x_data = []

                    for r in data_list :
                        for d in r : 
                            x_data.append( d )
                        pass
                    pass
                    
                    x_data = np.array( [ x_data ] )
                    x_list.append( x_data )

                    # y data generation
                    y_data = np.array( data.copy() )
                    y_list.append( y_data )
                pass

                data_list.pop( 0 )
                data_list.append( data.copy() ) 
            pass                

            ymdh_prev = ymdh
            
            idx += 1                
        pass # // 한 줄 씩 읽기
        
        self.x_list = np.array( x_list )
        self.y_list = np.array( y_list )

        debug and log.info( "[%s] data_distcontinuous_cnt : %d" % ( fn, data_distcontinuous_cnt ) )
        log.info( "[%s] max_y = %s" % (fn, self.max_y ) )
Beispiel #23
0
#We will use MCAR as a driving process behind making missing data has 0.1 probability of being set to NaN.
data = pd.read_csv('Tab.delimited.Cleaned.dataset.WITH.variable.labels.csv',
                   sep='\t',
                   engine='python')
file = open('categorical.txt')
labels = []
for line in file:
    word = line.rstrip('\n')
    labels.append(word)

data = data.loc[:, labels]  #The selected columns prediction
data = data.replace(' ', np.nan)
data = data.dropna()

prob_missing = 0.1
data_incomplete = data.copy()
ix = [(row, col) for row in range(data.shape[0])
      for col in range(data.shape[1])]
for row, col in random.sample(ix, int(round(prob_missing * len(ix)))):
    data_incomplete.iat[row, col] = np.nan
missing_encoded = pd.get_dummies(data_incomplete)
complete_encoded = pd.get_dummies(data)
print(complete_encoded.info(verbose=True))
for col in data.columns:
    missing_cols = missing_encoded.columns.str.startswith(str(col) + "_")
    missing_encoded.loc[data_incomplete[col].isnull(), missing_cols] = np.nan
print(missing_encoded.shape)
print(missing_encoded.values)
print(complete_encoded.shape)
print(complete_encoded.values.astype('float'))
Beispiel #24
0
def createMask(
        data):  #The function is used to create mask for the missing data
    miss_data = data.copy()
    missing_mask = np.isnan(data)  #bool matrix of mask
    miss_data[missing_mask] = -1.0  # fill data with -1
    return miss_data, missing_mask
    def __init__(self, root, phase='train'):
        self.root = root
        self.phase = phase
        self.labels = {}
        #self.data_index_pool = [0, 1, 5, 6, 7, 19, 24, 25, 29, 31]
        #self.label_index_pool = [2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 26, 27, 28, 30, 32, 33, 34]

        self.label_path = os.path.join(self.root, self.phase + '.csv')
        with open(self.label_path, 'r', encoding='utf-8-sig') as f:
            # skip two lines
            f.readline()
            f.readline()

            input_data = []
            output_data = []

            # 20200330
            # Sample Train Dataset(2020. 01. 01 ~ 05. 01) 중 1일치 데이터(3월 30일)은 기계 오류로 인해 데이터가 수집 되지 않았음

            idx = 0
            max_idx = 0  #TODO 9000 max idx

            data_prev = []
            hour_prev = -1

            for line in f.readlines():
                values = line.strip().split(',')

                date = int(values[0])
                hour = int(values[1])

                if max_idx and max_idx == idx:
                    log.info("**** max idx encounterd. %s" % max_idx)
                    break
                pass

                if 20200330 == date and phase == 'train':
                    log.info("**** data encountered: %s, phase = %s \n" %
                             (date, phase))
                    break
                pass

                data = values[2:]
                data = np.asfarray(data, np.float32)

                if phase != "train":
                    input = data.copy()
                    output = data.copy()
                    input_data.append(input)
                    output_data.append(output)

                    if 0:
                        log.info(
                            "[%04d] date: %08d, hour: %02d, data len = %d ================================="
                            % (idx, date, hour, len(input)))
                        log.info("[%04d] %08d, %02d, input  : %s" %
                                 (idx, date, hour, input))
                        log.info("[%04d] %08d, %02d, output : %s" %
                                 (idx, date, hour, output))
                        pass
                elif 0 == idx:
                    log.info("data_prev is null.")
                else:
                    input = data_prev.copy()
                    output = data.copy()

                    input_data.append(input)
                    output_data.append(output)

                    if 1:
                        log.info(
                            "[%04d] date: %08d, hour: %02d, data len = %d ================================="
                            % (idx, date, hour, len(input)))
                        log.info("[%04d] %08d, %02d, input  : %s" %
                                 (idx, date, hour, input))
                        log.info("[%04d] %08d, %02d, output : %s" %
                                 (idx, date, hour, output))
                    pass
                pass

                data_prev = data
                idx += 1
            pass

            global is_corrcoeff_saved

            if not is_corrcoeff_saved:
                #TODO 0020 corrcoef matrix
                covMatrix = np.corrcoef(np.transpose(input_data), bias=True)
                print("===== corrcoef matrix ======")
                print(covMatrix)
                np.savetxt("corrcoef.csv", covMatrix, delimiter=",")

                is_corrcoeff_saved = 1
            pass

            if 0:
                #TODO 0006 시스템 강제 종료
                log.info("sys.exit(0)")
                import sys
                sys.exit(0)
            pass
        pass

        self.labels['input'] = input_data
        self.labels['output'] = output_data
Beispiel #26
0
    def __data_inverse(self, data_row, num_samples):
        """Generates a neighborhood around a prediction.

        For numerical features, perturb them by sampling from a Normal(0,1) and
        doing the inverse operation of mean-centering and scaling, according to
        the means and stds in the training data. For categorical features,
        perturb by sampling according to the training distribution, and making
        a binary feature that is 1 when the value is the same as the instance
        being explained.

        Args:
            data_row: 1d numpy array, corresponding to a row
            num_samples: size of the neighborhood to learn the linear model

        Returns:
            A tuple (data, inverse), where:
                data: dense num_samples * K matrix, where categorical features
                are encoded with either 0 (not equal to the corresponding value
                in data_row) or 1. The first row is the original instance.
                inverse: same as data, except the categorical features are not
                binary, but categorical (as the original data)
        """
        is_sparse = sp.sparse.issparse(data_row)
        if is_sparse:
            num_cols = data_row.shape[1]
            data = sp.sparse.csr_matrix((num_samples, num_cols),
                                        dtype=data_row.dtype)
        else:
            num_cols = data_row.shape[0]
            data = np.zeros((num_samples, num_cols))
        categorical_features = range(num_cols)
        if self.discretizer is None:
            instance_sample = data_row
            scale = self.scaler.scale_
            mean = self.scaler.mean_
            if is_sparse:
                # Perturb only the non-zero values
                non_zero_indexes = data_row.nonzero()[1]
                num_cols = len(non_zero_indexes)
                instance_sample = data_row[:, non_zero_indexes]
                scale = scale[non_zero_indexes]
                mean = mean[non_zero_indexes]

            data = self.random_state.normal(0, 1,
                                            num_samples * num_cols).reshape(
                                                num_samples, num_cols)
            if self.sample_around_instance:
                data = data * scale + instance_sample
            else:
                data = data * scale + mean
            if is_sparse:
                if num_cols == 0:
                    data = sp.sparse.csr_matrix(
                        (num_samples, data_row.shape[1]), dtype=data_row.dtype)
                else:
                    indexes = np.tile(non_zero_indexes, num_samples)
                    indptr = np.array(
                        range(0,
                              len(non_zero_indexes) * (num_samples + 1),
                              len(non_zero_indexes)))
                    data_1d_shape = data.shape[0] * data.shape[1]
                    data_1d = data.reshape(data_1d_shape)
                    data = sp.sparse.csr_matrix(
                        (data_1d, indexes, indptr),
                        shape=(num_samples, data_row.shape[1]))
            categorical_features = self.categorical_features
            first_row = data_row
        else:
            first_row = self.discretizer.discretize(data_row)
        data[0] = data_row.copy()
        inverse = data.copy()
        for column in categorical_features:
            values = self.feature_values[column]
            freqs = self.feature_frequencies[column]

            inverse_column = self.random_state.choice(values,
                                                      size=num_samples,
                                                      replace=True,
                                                      p=freqs)
            binary_column = (inverse_column == first_row[column]).astype(int)
            binary_column[0] = 1
            inverse_column[0] = data[0, column]
            data[:, column] = binary_column
            inverse[:, column] = inverse_column
        if self.discretizer is not None:
            inverse[1:] = self.discretizer.undiscretize(inverse[1:])
        inverse[0] = data_row
        return data, inverse
Beispiel #27
0
                                (coordy + yd) <= 0) | (
                                    (coordx + xd) >= data.shape[2]) | (
                                        (coordx + xd) <= 0):
                        continue

                    temp = int(redu_range * 10)
                    redu = random.randrange(0, temp, 1)
                    print(coordz + zd, coordy + yd, coordx + xd)
                    rand = random.randrange(0, 10, 1)
                    if (rand == 7) | (rand == 6) | (rand == 9) | (rand == 8):
                        data[coordz + zd, coordy + yd,
                             coordx + xd] = (data[coordz + zd, coordy + yd,
                                                  coordx + xd]) * 0.55
                        continue
                    data[coordz + zd, coordy + yd,
                         coordx + xd] = (data[coordz + zd, coordy + yd,
                                              coordx + xd]) * 0.4


with h5py.File('image-2.h5', 'r') as raw:
    data = (raw['image'][()])  # .a
print(data)

skimage.io.imsave('test2.tif', data.astype('uint8'))

data1 = data.copy()
print(data1 == data)
abs_gap_aug(data1, 5, 0.5)
print("!!!!!!!!!!!!!!!")
skimage.io.imsave('test1.tif', data1.astype('uint8'))
print(data1 == data)
Beispiel #28
0
 def __init__(self, data):
     self.data = data.copy().astype(np.float32)
Beispiel #29
0
def main():
    global i
    global loss_sum
    global running
    parser = ArgumentParser()
    parser.add_argument("-bit_w", type=int, default=8, help="Bit vector length for copy task")
    parser.add_argument("-block_w", type=int, default=3, help="Block width to associative recall task")
    parser.add_argument("-len", type=str, default="4", help="Sequence length for copy task", parser=lambda x: [int(a) for a in x.split("-")])
    parser.add_argument("-repeat", type=str, default="1", help="Sequence length for copy task", parser=lambda x: [int(a) for a in x.split("-")])
    parser.add_argument("-batch_size", type=int, default=16, help="Sequence length for copy task")
    parser.add_argument("-n_subbatch", type=str, default="auto", help="Average this much forward passes to a backward pass")
    parser.add_argument("-max_input_count_per_batch", type=int, default=6000, help="Max batch_size*len that can fit into memory")
    parser.add_argument("-lr", type=float, default=0.0001, help="Learning rate")
    parser.add_argument("-wd", type=float, default=1e-5, help="Weight decay")
    parser.add_argument("-optimizer", type=str, default="rmsprop", help="Optimizer algorithm")
    parser.add_argument("-name", type=str, help="Save training to this directory")
    parser.add_argument("-preview_interval", type=int, default=10, help="Show preview every nth iteration")
    parser.add_argument("-info_interval", type=int, default=10, help="Show info every nth iteration")
    parser.add_argument("-save_interval", type=int, default=500, help="Save network every nth iteration")
    parser.add_argument("-masked_lookup", type=bool, default=1, help="Enable masking in content lookups")
    parser.add_argument("-visport", type=int, default=-1, help="Port to run Visdom server on. -1 to disable")
    parser.add_argument("-gpu", default="auto", type=str, help="Run on this GPU.")
    parser.add_argument("-debug", type=bool, default=1, help="Enable debugging")
    parser.add_argument("-task", type=str, default="copy", help="Task to learn")
    parser.add_argument("-mem_count", type=int, default=16, help="Number of memory cells")
    parser.add_argument("-data_word_size", type=int, default=128, help="Memory word size")
    parser.add_argument("-n_read_heads", type=int, default=1, help="Number of read heads")
    parser.add_argument("-layer_sizes", type=str, default="256", help="Controller layer sizes. Separate with ,. For example 512,256,256", parser=lambda x: [int(y) for y in x.split(",") if y])
    parser.add_argument("-debug_log", type=bool, default=0, help="Enable debug log")
    parser.add_argument("-controller_type", type=str, default="lstm", help="Controller type: lstm or linear")
    parser.add_argument("-lstm_use_all_outputs", type=bool, default=1, help="Use all LSTM outputs as controller output vs use only the last layer")
    parser.add_argument("-momentum", type=float, default=0.9, help="Momentum for optimizer")
    parser.add_argument("-embedding_size", type=int, default=256, help="Size of word embedding for NLP tasks")
    parser.add_argument("-test_interval", type=int, default=10000, help="Run test in this interval")
    parser.add_argument("-dealloc_content", type=bool, default=1, help="Deallocate memory content, unlike DNC, which leaves it unchanged, just decreases the usage counter, causing problems with lookup")
    parser.add_argument("-sharpness_control", type=bool, default=1, help="Distribution sharpness control for forward and backward links")
    parser.add_argument("-think_steps", type=int, default=0, help="Iddle steps before requiring the answer (for bAbi)")
    parser.add_argument("-dump_profile", type=str, save=False)
    parser.add_argument("-test_on_start", default="0", save=False)
    parser.add_argument("-dump_heatmaps", default=False, save=False)
    parser.add_argument("-test_batch_size", default=16)
    parser.add_argument("-mask_min", default=0.0)
    parser.add_argument("-load", type=str, save=False)
    parser.add_argument("-dataset_path", type=str, default="none", parser=ArgumentParser.str_or_none(), help="Specify babi path manually")
    parser.add_argument("-babi_train_tasks", type=str, default="none", parser=ArgumentParser.list_or_none(type=str), help="babi task list to use for training")
    parser.add_argument("-babi_test_tasks", type=str, default="none", parser=ArgumentParser.list_or_none(type=str), help="babi task list to use for testing")
    parser.add_argument("-babi_train_sets", type=str, default="train", parser=ArgumentParser.list_or_none(type=str), help="babi train sets to use")
    parser.add_argument("-babi_test_sets", type=str, default="test", parser=ArgumentParser.list_or_none(type=str), help="babi test sets to use")
    parser.add_argument("-noargsave", type=bool, default=False, help="Do not save modified arguments", save=False)
    parser.add_argument("-demo", type=bool, default=False, help="Do a single step with fixed seed", save=False)
    parser.add_argument("-exit_after", type=int, help="Exit after this amount of steps. Useful for debugging.", save=False)
    parser.add_argument("-grad_clip", type=float, default=10.0, help="Max gradient norm")
    parser.add_argument("-clip_controller", type=float, default=20.0, help="Max gradient norm")
    parser.add_argument("-print_test", default=False, save=False)

    parser.add_profile([
        ArgumentParser.Profile("babi", {
            "preview_interval": 10,
            "save_interval": 500,
            "task": "babi",
            "mem_count": 256,
            "data_word_size": 64,
            "n_read_heads": 4,
            "layer_sizes": "256",
            "controller_type": "lstm",
            "lstm_use_all_outputs": True,
            "momentum": 0.9,
            "embedding_size": 128,
            "test_interval": 5000,
            "think_steps": 3,
            "batch_size": 2
        }, include=["dnc-msd"]),

        ArgumentParser.Profile("repeat_copy", {
            "bit_w": 8,
            "repeat": "1-8",
            "len": "2-14",
            "task": "copy",
            "think_steps": 1,
            "preview_interval": 10,
            "info_interval": 10,
            "save_interval": 100,
            "data_word_size": 16,
            "layer_sizes": "32",
            "n_subbatch": 1,
            "controller_type": "lstm",
        }),

        ArgumentParser.Profile("repeat_copy_simple", {
            "repeat": "1-3",
        }, include="repeat_copy"),

        ArgumentParser.Profile("dnc", {
            "masked_lookup": False,
            "sharpness_control": False,
            "dealloc_content": False
        }),

        ArgumentParser.Profile("dnc-m", {
            "masked_lookup": True,
            "sharpness_control": False,
            "dealloc_content": False
        }),

        ArgumentParser.Profile("dnc-s", {
            "masked_lookup": False,
            "sharpness_control": True,
            "dealloc_content": False
        }),

        ArgumentParser.Profile("dnc-d", {
            "masked_lookup": False,
            "sharpness_control": False,
            "dealloc_content": True
        }),

        ArgumentParser.Profile("dnc-md", {
            "masked_lookup": True,
            "sharpness_control": False,
            "dealloc_content": True
        }),

        ArgumentParser.Profile("dnc-ms", {
            "masked_lookup": True,
            "sharpness_control": True,
            "dealloc_content": False
        }),

        ArgumentParser.Profile("dnc-sd", {
            "masked_lookup": False,
            "sharpness_control": True,
            "dealloc_content": True
        }),

        ArgumentParser.Profile("dnc-msd", {
            "masked_lookup": True,
            "sharpness_control": True,
            "dealloc_content": True
        }),

        ArgumentParser.Profile("keyvalue", {
            "repeat": "1",
            "len": "2-16",
            "mem_count": 16,
            "task": "keyvalue",
            "think_steps": 1,
            "preview_interval": 10,
            "info_interval": 10,
            "data_word_size": 32,
            "bit_w": 12,
            "save_interval": 1000,
            "layer_sizes": "32"
        }),

        ArgumentParser.Profile("keyvalue2way", {
            "task": "keyvalue2way",
        }, include="keyvalue"),

        ArgumentParser.Profile("associative_recall",{
            "task": "recall",
            "bit_w": 8,
            "len": "2-16",
            "mem_count": 64,
            "data_word_size": 32,
            "n_read_heads": 1,
            "layer_sizes": "128",
            "controller_type": "lstm",
            "lstm_use_all_outputs": 1,
            "think_steps": 1,
            "mask_min": 0.1,
            "info_interval": 10,
            "save_interval": 1000,
            "preview_interval": 10,
            "n_subbatch": 1,
        })
    ])

    opt = parser.parse()
    assert opt.name is not None, "Training dir (-name parameter) not given"
    opt = parser.sync(os.path.join(opt.name, "args.json"), save=not opt.noargsave)

    if opt.demo:
        Seed.fix()

    os.makedirs(os.path.join(opt.name,"save"), exist_ok=True)
    os.makedirs(os.path.join(opt.name,"preview"), exist_ok=True)

    gpu_allocator.use_gpu(opt.gpu)

    debug.enableDebug = opt.debug_log

    if opt.visport>0:
        Visdom.start(opt.visport)

    Visdom.Text("Name").set(opt.name)

    class LengthHackSampler:
        def __init__(self, batch_size, length):
            self.length = length
            self.batch_size = batch_size

        def __iter__(self):
            while True:
                len = self.length() if callable(self.length) else self.length
                yield [len] * self.batch_size

        def __len__(self):
            return 0x7FFFFFFF

    embedding = None
    test_set = None
    curriculum = None
    loader_reset = False
    if opt.task=="copy":
        dataset = CopyData(bit_w=opt.bit_w)
        in_size = opt.bit_w + 1
        out_size = in_size
    elif opt.task=="recall":
        dataset = AssociativeRecall(bit_w=opt.bit_w, block_w=opt.block_w)
        in_size = opt.bit_w + 2
        out_size = in_size
    elif opt.task=="keyvalue":
        assert opt.bit_w % 2==0, "Key-value datasets works only with even bit_w"
        dataset = KeyValue(bit_w=opt.bit_w)
        in_size = opt.bit_w + 1
        out_size = opt.bit_w//2
    elif opt.task=="keyvalue2way":
        assert opt.bit_w % 2==0, "Key-value datasets works only with even bit_w"
        dataset = KeyValue2Way(bit_w=opt.bit_w)
        in_size = opt.bit_w + 2
        out_size = opt.bit_w//2
    elif opt.task=="babi":
        dataset = bAbiDataset(think_steps=opt.think_steps, dir_name=opt.dataset_path)
        test_set = bAbiDataset(think_steps=opt.think_steps, dir_name=opt.dataset_path, name="test")
        dataset.use(opt.babi_train_tasks, opt.babi_train_sets)
        in_size = opt.embedding_size
        print("bAbi: loaded total of %d sequences." % len(dataset))
        test_set.use(opt.babi_test_tasks, opt.babi_test_sets)
        out_size = len(dataset.vocabulary)
        print("bAbi: using %d sequences for training, %d for testing" % (len(dataset), len(test_set)))
    else:
        assert False, "Invalid task: %s" % opt.task

    if opt.task in ["babi"]:
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, num_workers=4, pin_memory=True, shuffle=True, collate_fn=MetaCollate())
        test_loader = torch.utils.data.DataLoader(test_set, batch_size=opt.test_batch_size, num_workers=opt.test_batch_size, pin_memory=True, shuffle=False, collate_fn=MetaCollate()) if test_set is not None else None
    else:
        dataset = BitmapTaskRepeater(dataset)
        data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=LengthHackSampler(opt.batch_size, BitmapTaskRepeater.key_sampler(opt.len, opt.repeat)), num_workers=1, pin_memory=True)

    if opt.controller_type == "lstm":
        controller_constructor = functools.partial(LSTMController, out_from_all_layers=opt.lstm_use_all_outputs)
    elif opt.controller_type == "linear":
        controller_constructor = FeedforwardController
    else:
        assert False, "Invalid controller: %s" % opt.controller_type

    model = DNC(in_size, out_size, opt.data_word_size, opt.mem_count, opt.n_read_heads, controller_constructor(opt.layer_sizes),
                batch_first=True, mask=opt.masked_lookup, dealloc_content=opt.dealloc_content,
                link_sharpness_control=opt.sharpness_control,
                mask_min=opt.mask_min, clip_controller=opt.clip_controller)

    params = [
        {'params': [p for n, p in model.named_parameters() if not n.endswith(".bias")]},
        {'params': [p for n, p in model.named_parameters() if n.endswith(".bias")], 'weight_decay': 0}
    ]

    device = torch.device('cuda') if opt.gpu!="none" else torch.device("cpu")
    print("DEVICE: ", device)

    if isinstance(dataset, NLPTask):
        embedding = torch.nn.Embedding(len(dataset.vocabulary), opt.embedding_size).to(device)
        params.append({'params': embedding.parameters(), 'weight_decay': 0})

    if opt.optimizer=="sgd":
        optimizer = torch.optim.SGD(params, lr=opt.lr, weight_decay=opt.wd, momentum=opt.momentum)
    elif opt.optimizer=="adam":
        optimizer = torch.optim.Adam(params, lr=opt.lr, weight_decay=opt.wd)
    elif opt.optimizer == "rmsprop":
        optimizer = torch.optim.RMSprop(params, lr=opt.lr, weight_decay=opt.wd, momentum=opt.momentum, eps=1e-10)
    else:
        assert "Invalid optimizer: %s" % opt.optimizer

    n_params = sum([sum([t.numel() for t in d['params']]) for d in params])
    print("Number of parameters: %d" % n_params)

    model = model.to(device)
    if embedding is not None and hasattr(embedding, "to"):
        embedding = embedding.to(device)

    i=0
    loss_sum = 0

    loss_plot = Visdom.Plot2D("loss", store_interval=opt.info_interval, xlabel="iterations", ylabel="loss")

    if curriculum is not None:
        curriculum_plot = Visdom.Plot2D("curriculum lesson" +
                                    (" (last %d)" % (curriculum.n_lessons-1) if curriculum.n_lessons is not None else ""),
                                    xlabel="iterations", ylabel="lesson")
        curriculum_accuracy = Visdom.Plot2D("curriculum accuracy", xlabel="iterations", ylabel="accuracy")

    saver = Saver(os.path.join(opt.name, "save"), short_interval=opt.save_interval)
    saver.register("model", StateSaver(model))
    saver.register("optimizer", StateSaver(optimizer))
    saver.register("i", GlobalVarSaver("i"))
    saver.register("loss_sum", GlobalVarSaver("loss_sum"))
    saver.register("loss_plot", StateSaver(loss_plot))
    saver.register("dataset", StateSaver(dataset))
    if test_set:
        saver.register("test_set", StateSaver(test_set))

    if curriculum is not None:
        saver.register("curriculum", StateSaver(curriculum))
        saver.register("curriculum_plot", StateSaver(curriculum_plot))
        saver.register("curriculum_accuracy", StateSaver(curriculum_accuracy))

    if isinstance(dataset, NLPTask):
        saver.register("word_embeddings", StateSaver(embedding))
    elif embedding is not None:
        saver.register("embeddings", StateSaver(embedding))

    if not saver.load(opt.load):
        model.reset_parameters()
        if embedding is not None:
            embedding.reset_parameters()

    visualizers = {}

    debug_schemas={
        "read_head" : {
            "list_dim" : 2
        },
        "temporal_links/forward_dists" : {
            "list_dim" : 2
        },
        "temporal_links/backward_dists" : {
            "list_dim" : 2
        }
    }

    def plot_debug(debug, prefix="", schema={}):
        if debug is None:
            return

        for k, v in debug.items():
            curr_name = prefix+k
            if curr_name in debug_schemas:
                curr_schema = schema.copy()
                curr_schema.update(debug_schemas[curr_name])
            else:
                curr_schema = schema

            if isinstance(v, dict):
                plot_debug(v, curr_name+"/", curr_schema)
                continue

            data = v[0]

            if curr_schema.get("list_dim",-1) > 0:
                if data.ndim != 3:
                    print("WARNING: unknown data shape for array display: %s, tensor %s" % (data.shape, curr_name))
                    continue

                n_steps = data.shape[curr_schema["list_dim"]-1]
                if curr_name not in visualizers:
                    visualizers[curr_name] = [Visdom.Heatmap(curr_name+"_%d" % i, dumpdir=os.path.join(opt.name, "preview") if opt.dump_heatmaps else None) for i in range(n_steps)]

                for i in range(n_steps):
                    visualizers[curr_name][i].draw(index_by_dim(data, curr_schema["list_dim"]-1, i))
            else:
                if data.ndim != 2:
                    print("WARNING: unknown data shape for simple display: %s, tensor %s" % (data.shape, curr_name))
                    continue

                if curr_name not in visualizers:
                    visualizers[curr_name] = Visdom.Heatmap(curr_name, dumpdir=os.path.join(opt.name, "preview") if opt.dump_heatmaps else None)

                visualizers[curr_name].draw(data)


    def run_model(input, debug=None):
        if isinstance(dataset, NLPTask):
            input = embedding(input["input"])
        else:
            input = input["input"] * 2.0 - 1.0

        return model(input, debug=debug)

    def multiply_grads(params, mul):
        if mul==1:
            return

        for pa in params:
            for p in pa["params"]:
                p.grad.data *= mul

    def test():
        if test_set is None:
            return

        print("TESTING...")
        start_time=time.time()
        t = test_set.start_test()
        with torch.no_grad():
            for data in tqdm(test_loader):
                data = {k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items()}
                if hasattr(dataset, "prepare"):
                    data = dataset.prepare(data)

                net_out = run_model(data)
                test_set.veify_result(t, data, net_out)

        test_set.show_test_results(i, t)
        print("Test done in %gs" % (time.time() - start_time))

    if opt.test_on_start.lower() in ["on", "1", "true", "quit"]:
        test()
        if opt.test_on_start.lower() == "quit":
            saver.write(i)
            sys.exit(-1)

    if opt.print_test:
        model.eval()
        total = 0
        correct = 0
        with torch.no_grad():
            for data in tqdm(test_loader):
                if not running:
                    return

                data = {k: v.to(device) if torch.is_tensor(v) else v for k, v in data.items()}
                if hasattr(test_set, "prepare"):
                    data = test_set.prepare(data)

                net_out = run_model(data)

                c,t = test_set.curriculum_measure(net_out, data["output"])
                total += t
                correct += c

        print("Test result: %2.f%% (%d out of %d correct)" % (100.0*correct/total, correct, total))
        model.train()
        return

    iter_start_time = time.time() if i % opt.info_interval == 0 else None
    data_load_total_time = 0

    start_i = i

    if opt.dump_profile:
        profiler = torch.autograd.profiler.profile(use_cuda=True)


    if opt.dump_heatmaps:
        dataset.set_dump_dir(os.path.join(opt.name, "preview"))

    @preview()
    def do_visualize(raw_data, output, pos_map, debug):
        if pos_map is not None:
            output = embedding.backmap_output(output, pos_map, raw_data["output"].shape[1])
        dataset.visualize_preview(raw_data, output)

        if debug is not None:
            plot_debug(debug)

    preview_timer=OnceEvery(opt.preview_interval)

    pos_map = None
    start_iter = i

    if curriculum is not None:
        curriculum.init()

    while running:
        data_load_timer = time.time()
        for data in data_loader:
            if not running:
                break

            if loader_reset:
                print("Loader reset requested. Resetting...")
                loader_reset = False
                if curriculum is not None:
                    curriculum.lesson_started()
                break

            if opt.dump_profile:
                if i==start_i+1:
                    print("Starting profiler")
                    profiler.__enter__()
                elif i==start_i+5+1:
                    print("Stopping profiler")
                    profiler.__exit__(None, None, None)
                    print("Average stats")
                    print(profiler.key_averages().table("cpu_time_total"))
                    print("Writing trace to file")
                    profiler.export_chrome_trace(opt.dump_profile)
                    print("Done.")
                    sys.exit(0)
                else:
                    print("Step %d out of 5" % (i-start_i))

            debug.dbg_print("-------------------------------------")
            raw_data = data

            data = {k: v.to(device) if torch.is_tensor(v) else v for k,v in data.items()}
            if hasattr(dataset, "prepare"):
                data = dataset.prepare(data)

            data_load_total_time += time.time() - data_load_timer

            need_preview = preview_timer()
            debug_data = {} if opt.debug and need_preview else None


            optimizer.zero_grad()

            if opt.n_subbatch=="auto":
                n_subbatch = math.ceil(data["input"].numel() / opt.max_input_count_per_batch)
            else:
                n_subbatch = int(opt.n_subbatch)

            real_batch = max(math.floor(opt.batch_size/n_subbatch),1)
            n_subbatch = math.ceil(opt.batch_size/real_batch)
            remaning_batch = opt.batch_size % real_batch

            for subbatch in range(n_subbatch):
                if not running:
                    break
                input = data["input"]
                target = data["output"]

                if n_subbatch!=1:
                    input = input[subbatch * real_batch: (subbatch + 1) * real_batch]
                    target = target[subbatch * real_batch:(subbatch + 1) * real_batch]

                f2 = data.copy()
                f2["input"] = input
                output = run_model(f2, debug=debug_data if subbatch==n_subbatch-1 else None)
                l = dataset.loss(output, target)
                debug.nan_check(l, force=True)
                l.backward()

                if curriculum is not None:
                    curriculum.update(*dataset.curriculum_measure(output, target))

                if remaning_batch!=0 and subbatch == n_subbatch-2:
                    multiply_grads(params, real_batch/remaning_batch)

            if n_subbatch!=1:
                if remaning_batch==0:
                    multiply_grads(params, 1/n_subbatch)
                else:
                    multiply_grads(params, remaning_batch / opt.batch_size)

            for p in params:
                torch.nn.utils.clip_grad_norm_(p["params"], opt.grad_clip)

            optimizer.step()

            i += 1

            curr_loss = l.data.item()
            loss_plot.add_point(i, curr_loss)

            loss_sum += curr_loss


            if i % opt.info_interval == 0:
                tim = time.time()
                loss_avg = loss_sum / opt.info_interval

                if curriculum is not None:
                    curriculum_accuracy.add_point(i, curriculum.get_accuracy())
                    curriculum_plot.add_point(i, curriculum.step)

                message = "Iteration %d, loss: %.4f" % (i, loss_avg)
                if iter_start_time is not None:
                    message += " (%.2f ms/iter, load time %.2g ms/iter, visport: %s)" % (
                                (tim - iter_start_time) / opt.info_interval * 1000.0,
                                data_load_total_time / opt.info_interval * 1000.0,
                                Visdom.port)
                print(message)
                iter_start_time = tim
                loss_sum = 0
                data_load_total_time = 0

            debug.dbg_print("Iteration %d, loss %g" % (i, curr_loss))

            if need_preview:
                do_visualize(raw_data, output, pos_map, debug_data)

            if i % opt.test_interval==0:
                test()

            saver.tick(i)

            if opt.demo and opt.exit_after is None:
                running = False
                input("Press enter to quit.")

            if opt.exit_after is not None and (i-start_iter)>=opt.exit_after:
                running=False

            data_load_timer = time.time()