예제 #1
0
 def __init__(self, path):
     self.CD = CD(path)
     self.DD = DD(path)
     self.DO = DO(path)
     self.GD = GD(path)
     self.GO = GO(path)
     self.ID = ID(path)
     self.OO = OO(path)
예제 #2
0
def defensegan_gd(x, params, netG):
    z_array = []
    for i in range(params['r']):
        z_array.append(
            torch.FloatTensor(params['nz'], 1, 1).normal_(0, 1).numpy())

    result = None
    total_time = 0

    for i in range(params['L']):
        z_array, result, time = GD(x, params, netG, z_array)
        total_time += time

    return result, total_time
예제 #3
0
class CodeBook:

    def __init__(self, path):
        self.CD = CD(path)
        self.DD = DD(path)
        self.DO = DO(path)
        self.GD = GD(path)
        self.GO = GO(path)
        self.ID = ID(path)
        self.OO = OO(path)

    def decode(self, code, encoding, year):

        if encoding == 'CD':
            return self.CD.decode(code, year)
        elif encoding == 'DD':
            return self.DD.decode(code, year)
        elif encoding == 'DO':
            return self.DO.decode(code, year)
        elif encoding == 'GD':
            return self.GD.decode(code, year)
        elif encoding == 'GO':
            return self.GO.decode(code, year)
        elif encoding == 'ID':
            return self.ID.decode(code, year)
        elif encoding == 'OO':
            return self.OO.decode(code, year)
        else:
            print 'Error: encoding do not found'
            exit(-1)

    def decode_file(self, filename, encoding, year):
        data = []
        f = open(filename)
        for line in f.readlines():
            code = self.decode(line[:-1], encoding, year)
            data.append(code)
        f.close()
        return data
예제 #4
0
def perform(f, init):

    # 1. Vanilla Gradient Descent
    alg_GD = GD(f)
    min_GD, t_GD = alg_GD.perform(init, a=0.01)
    if t_GD == 0:
        t_GD = 10000
    #write_matrix(min_GD, 'GD')
    min_GD = min_GD.tolist()
    min_GD = {'x': min_GD[0], 'y': min_GD[1], 't': t_GD}

    # 2. Gradient Descent with Momentum
    alg_GDm = GD_m(f)
    min_GDm, t_GDm, mu_GDm = alg_GDm.perform(init, validation=False, a=0.01)
    if t_GDm == 0:
        t_GDm = 10000
    #write_matrix(min_GDm, 'GDm')
    min_GDm = min_GDm.tolist()
    min_GDm = {'x': min_GDm[0], 'y': min_GDm[1], 't': t_GDm}

    # 3. AdaGrad
    alg_AdaGrad = AdaGrad(f)
    min_AdaGrad, t_AdaGrad = alg_AdaGrad.perform(init, a=0.1)
    if t_AdaGrad == 0:
        t_AdaGrad = 10000
    #write_matrix(min_AdaGrad, 'AdaGrad')
    min_AdaGrad = min_AdaGrad.tolist()
    min_AdaGrad = {'x': min_AdaGrad[0], 'y': min_AdaGrad[1], 't': t_AdaGrad}

    # 4. RMSProp
    alg_RMSProp = RMSProp(f)
    min_RMSProp, t_RMSProp, param = alg_RMSProp.perform(init,
                                                        validation=False,
                                                        a=0.01)
    if t_RMSProp == 0:
        t_RMSProp = 10000
    #write_matrix(min_RMSProp, 'RMSProp')
    min_RMSProp = min_RMSProp.tolist()
    min_RMSProp = {'x': min_RMSProp[0], 'y': min_RMSProp[1], 't': t_RMSProp}

    # 5. Adam
    alg_Adam = Adam(f)
    min_Adam, t_Adam, a, b = alg_Adam.perform(init, validation=False, a=0.05)
    if t_Adam == 0:
        t_Adam = 10000
    #write_matrix(min_Adam, 'Adam')
    min_Adam = min_Adam.tolist()
    min_Adam = {'x': min_Adam[0], 'y': min_Adam[1], 't': t_Adam}

    print(t_GD, t_GDm, t_AdaGrad, t_RMSProp, t_Adam)

    return {
        'GD': {
            'steps': min_GD,
            'time': t_GD
        },
        'GDm': {
            'steps': min_GDm,
            'time': t_GDm
        },
        'AdaGrad': {
            'steps': min_AdaGrad,
            'time': t_AdaGrad
        },
        'RMSProp': {
            'steps': min_RMSProp,
            'time': t_RMSProp
        },
        'Adam': {
            'steps': min_Adam,
            'time': t_Adam
        }
    }
예제 #5
0
#param galerkin
N = 20
ordre = 4
a = 0.
b = 2.

#param rk2
dt = ((b - a) / N) / (2 * (ordre + 1))
Tmax = 20.

#param animation
ymin = -1.0
ymax = 1.0

#init galerkin
galerkin = GD(ordre, a, b, N)
#Array 3D contenant la solution
W = zeros((2, N, ordre))

#init animation
x = galerkin.getX()
anim = Animation(x, ymin, ymax)

#init rk2
rk2 = RK2(dt, galerkin.dW)

#avance temporelle
while (rk2.t < Tmax):

    def lim0():
        we = zeros((2, 1))
예제 #6
0
def gd_inhouse(epoch, learning_rate, X, y, X_test):
    from GD import GD

    clf = GD(nepoch=epoch, learning_rate=learning_rate)
    clf.fit(X, y)
    return clf.predict(X_test)
예제 #7
0
파일: main.py 프로젝트: akornmann/VF
#param galerkin
N = 20
ordre = 4
a=0.
b=2.

#param rk2
dt=((b-a)/N)/(2*(ordre+1))
Tmax=20.

#param animation
ymin = -1.0
ymax = 1.0

#init galerkin
galerkin = GD(ordre,a,b,N)
#Array 3D contenant la solution
W = zeros((2,N,ordre))

#init animation
x = galerkin.getX()
anim = Animation(x,ymin,ymax)

#init rk2
rk2 = RK2(dt,galerkin.dW)

#avance temporelle
while(rk2.t<Tmax):
    
    def lim0():
        we = zeros((2,1))
예제 #8
0
def execute_with_input(check_next=25, batch_size=25):

    serialID_to_entityID = get_serialID_to_entityID()
    main_data_df, explanations, data_id, data_x, data_label, data_x_features, data_ID_to_matrix = get_data(
    )
    emb_dim = record_class.embedding['HSCode'].shape[1]

    # -------------------------------------------
    domain_idx = {e[0]: e[1] for e in enumerate(domain_dims.keys())}
    domainInteraction_index = {}
    num_domains = len(domain_dims)
    k = 0
    for i in range(num_domains):
        for j in range(i + 1, num_domains):
            domainInteraction_index['_'.join(
                (domain_idx[i], domain_idx[j]))] = k
            k += 1

    idx = np.arange(len(data_id), dtype=int)
    np.random.shuffle(idx)

    data_x = data_x[idx]
    data_label = data_label[idx]
    data_id = data_id[idx]

    X_0 = np.array(data_x_features)[idx.tolist()]  # Relevant anomalies
    X_1 = obtain_normal_samples()  # Nominal

    X_1 = np.array(X_1)

    y_0 = np.ones(X_0.shape[0])
    y_1 = -1 * np.ones(X_1.shape[0])
    y = np.hstack([y_0, y_1])
    X = np.vstack([X_0, X_1])
    num_coeff = len(domainInteraction_index)

    classifier_obj = get_trained_classifier(X, y, num_domains, emb_dim)

    W = classifier_obj.W.cpu().data.numpy()
    emb_dim = W.shape[-1]

    # classifier_obj.predict_score_op(X_0)
    # Create a reference dataframe  :: data_reference_df
    data_reference_df = pd.DataFrame(data=np.vstack([data_id,
                                                     data_label]).transpose(),
                                     columns=['PanjivaRecordID', 'label'])

    data_reference_df['baseID'] = data_reference_df['PanjivaRecordID'].apply(
        lambda x: str(x)[:-3])
    data_reference_df['expl_1'] = -1
    data_reference_df['expl_2'] = -1
    data_reference_df['original_score'] = 1

    for i, row in data_reference_df.iterrows():
        _id = int(row['PanjivaRecordID'])
        if _id in explanations.keys():
            entry = explanations[_id]
            domain_1 = entry[0][0]
            domain_2 = entry[0][1]
            data_reference_df.loc[i,
                                  'expl_1'] = domainInteraction_index['_'.join(
                                      sorted([domain_1, domain_2]))]
            domain_1 = entry[1][0]
            domain_2 = entry[1][1]
            data_reference_df.loc[i,
                                  'expl_2'] = domainInteraction_index['_'.join(
                                      sorted([domain_1, domain_2]))]
        _x = data_ID_to_matrix[_id]
        data_reference_df.loc[
            i, 'original_score'] = classifier_obj.predict_score_op(
                np.array([_x]))[0]

    data_reference_df['cur_score'] = data_reference_df['original_score'].values

    # To get random results
    # Randomization
    cur_df = data_reference_df.copy()
    cur_df = cur_df.sample(frac=1).reset_index(drop=True)
    cur_df = shuffle(cur_df).reset_index(drop=True)

    clf_obj = copy.deepcopy(classifier_obj)

    working_df = cur_df.copy(deep=True)
    ref_data_df = main_data_df.copy(deep=True)

    precision = []
    recall = []
    domain_list = list(domain_dims.keys())
    total_posCount = len(working_df.loc[working_df['label'] == 1])

    # -------------------------------------------------
    #  Main loop
    # -------------------------------------------------
    next_K_precision = []
    prev_discovered_count = 0
    BATCH_SIZE = batch_size
    ID_COL = 'PanjivaRecordID'
    discovered_df = pd.DataFrame(columns=list(working_df.columns))

    W = clf_obj.W.cpu().data.numpy()
    GD_obj = GD(num_coeff, emb_dim, interaction_type=interaction_type)
    GD_obj.set_original_W(W)
    num_batches = len(data_reference_df) // batch_size
    zero_count = 0
    for batch_idx in tqdm(range(num_batches)):
        print('Batch : {}'.format(batch_idx + 1))
        if batch_idx == 0:
            lr = 0.25
            max_iter = 1000
        else:
            lr = 1
            max_iter = 500

        cur = working_df.head(BATCH_SIZE).reset_index(drop=True)
        if len(cur) < 2:
            break

        _tail_count = len(working_df) - BATCH_SIZE
        tmp = working_df.tail(_tail_count).reset_index(drop=True)
        if len(tmp.loc[tmp['label'] == 1]) == 0:
            zero_count += 1
            if zero_count > 5:
                next_K_precision.append(0)
                working_df = working_df.tail(_tail_count).reset_index(
                    drop=True)
                continue
        else:
            zero_count = 0
        # -----
        # Count( of discovered in the current batch ( at the top; defined by batch size )
        # -----
        cum_cur_discovered = prev_discovered_count + len(
            cur.loc[cur['label'] == 1])
        prev_discovered_count = cum_cur_discovered
        _recall = float(cum_cur_discovered) / total_posCount
        recall.append(_recall)

        x_ij = []
        x_entityIds = []

        flags = []  # Whether a pos anomaly or not
        terms = []  # Explanation terms
        discovered_df = discovered_df.append(cur, ignore_index=True)

        for i, row in discovered_df.iterrows():
            _mask = np.zeros(len(domainInteraction_index))
            if row['label'] == 1:
                _mask[row['expl_1']] = 1
                _mask[row['expl_2']] = 1
                flags.append(1)
                terms.append((
                    row['expl_1'],
                    row['expl_2'],
                ))
            else:
                flags.append(0)
                terms.append(())
            id_value = row['PanjivaRecordID']
            x_ij.append(data_ID_to_matrix[id_value])

            row_dict = ref_data_df.loc[(
                ref_data_df[ID_COL] == id_value)].iloc[0].to_dict()
            x_entityIds.append([row_dict[d] for d in domain_list])

        x_entityIds = np.array(x_entityIds)
        x_ij = np.array(x_ij)

        updated_W = GD_obj.update_weight(flags,
                                         terms,
                                         x_ij,
                                         lr=lr,
                                         max_iter=max_iter)

        # ----------------------------------------------------
        # Update Model
        # ----------------------------------------------------
        clf_obj.update_W(updated_W)
        clf_obj.update_binary_VarW(x_entityIds, flags)

        _tail_count = len(working_df) - BATCH_SIZE
        working_df = working_df.tail(_tail_count).reset_index(drop=True)

        # Obtain scores
        x_ij_test = []
        x_entityIds = fetch_entityID_arr_byList(
            ref_data_df, working_df['PanjivaRecordID'].values.tolist())
        for _id in working_df['PanjivaRecordID'].values:
            x_ij_test.append(data_ID_to_matrix[_id])

        x_ij_test = np.array(x_ij_test)

        new_scores = clf_obj.predict_bEF(x_entityIds, x_ij_test)

        old_scores = working_df['cur_score'].values
        _delta = new_scores - old_scores
        working_df['delta'] = new_scores
        working_df = working_df.sort_values(by='delta', ascending=False)
        working_df = working_df.reset_index(drop=True)

        tmp = working_df.head(check_next)
        _labels = tmp['label'].values
        res = len(np.where(_labels == 1)[0])
        _precison = res / check_next
        next_K_precision.append(_precison)

    return next_K_precision
data = scipy.io.loadmat('breast-cancer.mat')

b = data['labels_train']
A = np.concatenate((np.ones([data['features_train'].shape[0],1]), data['features_train'].toarray()), axis=1) # for bias

# First order oracle
# An example of python lambda function is below.
# This function computes the sigmoid of each entry of its input x.
sigmoid = lambda x: (1./(1.+ np.exp(-x)))
# fx computes the objective (l-2 regularized) of input x
fx     = lambda x: (-np.sum(np.log(sigmoid(b * (A.dot(x)))), axis=0))
# gradf computes the gradient (l-2 regularized) of input x
gradf  = lambda x: (-A.T.dot((1. - sigmoid(b*(A.dot(x)))) * b))

# parameters
maxit             = ...?
stepsize          = ...?
x0                = ...?

# gradient descent
xGD, obj          = GD(fx, gradf, stepsize, x0, maxit)

# plot the convergence
plt.figure()
plt.loglog((obj - data['optval'])/data['optval'])
plt.ylabel('(f(x)-f^*)/f^*')
plt.xlabel('iterations')
axes = plt.gca()

plt.show()