예제 #1
0
    def fit(self, train_x, train_y, evals, top):
        m, n = train_x.shape
        for i in range(m):
            uid, iid, rating = train_x[i][0], train_x[i][1], train_y[i]
            self.__user_item.setdefault(uid, {})
            self.__user_item[uid][iid] = rating

            self.__item_user.setdefault(iid, {})
            self.__item_user[iid][uid] = rating

            self.items_pool.append(iid)

        for u in self.__user_item:
            self.p.setdefault(u, np_random(self.__f) / np.sqrt(self.__f))
        for i in self.__item_user:
            self.q.setdefault(i, np_random(self.__f) / np.sqrt(self.__f))

        for step in range(self.__steps):
            for u in self.__user_item:
                dict_items = self.__user_item[u]
                for i, r in dict_items.items():
                    e = r - np.dot(self.p[u], self.q[i])
                    tmp = self.q[i]
                    self.q[i] += self.__lr * (e * self.p[u] - self.__lambda * tmp)
                    self.p[u] += self.__lr * (e * tmp - self.__lambda * self.p[u])
            self.__lr *= 0.9
            print("第%d次迭代完成!" % (step + 1))
            self.__loss()
            if evals and top:
                self.evals(evals[0], evals[1], top)
예제 #2
0
def main():
    import contextlib
    import time

    @contextlib.contextmanager
    def measure(title=""):
        if title:
            title = " " + title
        print("start%s" % title)
        started = time.time()
        yield
        needed = time.time() - started
        print("running%s needed %.2f seconds" % (title, needed))

    import emzed

    with measure("load pm"):
        pm = emzed.io.loadPeakMap("141208_pos001.mzXML")

    import copy
    # create modified copy
    pm2 = copy.deepcopy(pm)
    pm2.spectra = pm2.spectra[1:]

    pms = [pm, pm2]

    n = 10000
    integers = list(reversed(range(n)))
    for k in range(0, n, 10):
        integers[k] = None


    from numpy.random import randint, random as np_random
    import random

    tuples = [tuple(randint(0, 1000, size=10)) for _ in range(100)]

    with measure("create table"):
        t = emzed.utils.toTable("integers", integers, type_=int)
        t.addColumn("mzmin", t.apply(lambda: 100 + 900 * np_random() + np_random(), ()), type_=float)
        t.addColumn("mzmax", t.apply(lambda mzmin: mzmin + 0.1 * np_random(), (t.mzmin,)), type_=float)

        t.addColumn("rtmin", t.apply(lambda: 50 + 1000 * np_random(), ()), type_=float)
        t.addColumn("rtmax", t.apply(lambda rtmin: rtmin + 10 + 60 * np_random(), (t.rtmin,)), type_=float)
        t.addColumn("peakmap", t.apply(lambda: random.choice(pms), ()), type_=object)

        for i in range(30):
            t.addColumn("floats_%d" % i, t.integers + 1.1, type_=float)
            t.addColumn("strings_%d" % i, t.integers.apply(str) * (i % 3), type_=str)
            t.addColumn("tuples_%d" % i, t.apply(lambda: random.choice(tuples), ()), type_=object)
            t.addColumn("peakmaps_%d" % i, pms[i % 2], type_=object)

    with measure("write hdf5 table with %d rows and %d cols" % t.shape):
        to_hdf5(t, "test.hdf5")
예제 #3
0
def main():

    with measure("load pm"):
        # pm = emzed.io.loadPeakMap("141208_pos001.mzXML")
        pm = emzed.io.loadPeakMap("Danu.mzML")

    pm2 = copy.deepcopy(pm)
    # create modified copy
    pm2.spectra = pm2.spectra[1:]

    rtmin, rtmax = pm.rtRange()

    pms = [pm, pm2]

    n = 10000
    integers = list(reversed(range(n)))
    for k in range(0, n, 10):
        integers[k] = None

    flags = [i % 2 == 0 for i in range(n)]

    tuples = [tuple(randint(0, 1000, size=10)) for _ in range(100)]

    tsub = emzed.utils.toTable("a", (1, 2, 3), type_=int)

    with measure("create table"):
        t = emzed.utils.toTable("integers", integers, type_=int)
        t.addColumn("check", flags, type_=CheckState)
        t.addColumn("sub_table", tsub, type_=Table, format_="%r")
        t.addColumn("mzmin",
                    t.apply(lambda: 100 + 900 * np_random() + np_random(), ()),
                    type_=float)
        t.addColumn("mzmax",
                    t.apply(lambda mzmin: mzmin + 0.1 * np_random(),
                            (t.mzmin, )),
                    type_=float)

        t.addColumn("rtmin",
                    t.apply(lambda: rtmin + (rtmax - rtmin) * np_random(), ()),
                    type_=float)
        t.addColumn("rtmax",
                    t.apply(lambda rtmin: rtmin + 30 + 300 * np_random(),
                            (t.rtmin, )),
                    type_=float)
        t.addColumn("peakmap",
                    t.apply(lambda: random.choice(pms), ()),
                    type_=object)

        for i in range(10):
            print(i)
            t.addColumn("floats_%d" % i, t.integers + 1.1, type_=float)
            t.addColumn("strings_%d" % i,
                        t.integers.apply(str) * (i % 3),
                        type_=str)
            t.addColumn("tuples_%d" % i,
                        t.apply(lambda: random.choice(tuples), ()),
                        type_=object)
            t.addColumn("peakmaps_%d" % i, pms[i % 2], type_=object)

        target_ids = [None] * n
        for i in range(0, n, 100):
            target_ids[i] = "target_%d" % i
            target_ids[i + 1] = "target_%d" % i
            target_ids[i + 2] = "target_%d" % i
            target_ids[i + 3] = "target_%d" % i

        t.addColumn("target_id", target_ids, type_=str)

    n, m = t.shape
    #for fac in (1, 10):
    for fac in (1, ):
        n0 = n * fac
        with measure("write hdf5 table with %d rows and %d cols" % (n0, m)):
            path = "test_%d.hdf5" % (n * fac)
            with atomic_hdf5_writer(path) as add:
                for i in range(fac):
                    print(i, "out of", fac)
                    add(t)
예제 #4
0
    def bayesian_pattern_based(self, y, r_matrix, init_rules):

        # |A| : min((rule_space)/2,(rule_space+beta_l-alpha_l)/2)
        self.Asize = [[
            min(
                self.pattern_space[l] / 2, 0.5 *
                (self.pattern_space[l] + self.beta_l[l] - self.alpha_l[l]))
            for l in range(self.maxlen + 1)
        ]]
        # support threshold
        self.C = [1]

        self.maps = defaultdict(list)
        T0 = 1000

        rules_curr = init_rules
        pt_curr = -1000000000
        # now only consider 1 chain
        # it should have been maps[chain]
        self.maps[0].append([
            -1, [pt_curr / 3, pt_curr / 3, pt_curr / 3], rules_curr,
            [self.rules[i] for i in rules_curr], []
        ])
        alpha = np.inf
        for ith_iter in range(self.max_iter):
            rules_new = self.propose(rules_curr, y, r_matrix)
            cfmatrix, prob = self.compute_prob(r_matrix, y, rules_new)
            T = T0**(1 - ith_iter / self.max_iter)
            pt_new = sum(prob)
            # logger.debug("pt_new: %f, pt_curr: %f, T: %f, float(pt_new - pt_curr): %f" %
            #              (pt_new, pt_curr, T, float(pt_new - pt_curr)))
            if ith_iter > 0:
                # The original Wang et al. code did not have this check
                # and was resulting in RuntimeWarning because we were
                # passing np.exp() a very large number (-pt_curr = 1000000000 in 0-th iter).
                # We do not expect the algorithm performance to change with this check
                # and we can avoid the RuntimeWarning
                alpha = np.exp(float(pt_new - pt_curr) / T)
            if pt_new > sum(self.maps[0][-1][1]):
                if False:
                    logger.debug(
                        '\n** chain = {}, max at iter = {} ** \n accuracy = {}, TP = {},FP = {}, TN = {}, FN = {}\n '
                        'old is {}, pt_new is {}, prior_ChsRules={}, likelihood_1 = {}, likelihood_2 = {}\n '
                        .format(self.chains, ith_iter,
                                (cfmatrix[0] + cfmatrix[2] + 0.0) / len(y),
                                cfmatrix[0], cfmatrix[1], cfmatrix[2],
                                cfmatrix[3],
                                sum(self.maps[0][-1][1]) + 0.1, sum(prob),
                                prob[0], prob[1], prob[2]))
                # logger.debug("rules_new: %s" % str(rules_new))
                # logger.debug("const_denominator: %s" % str(self.const_denominator))
                self.Asize.append([
                    np.floor(
                        min(self.Asize[-1][l], (-pt_new + self.Lup + self.P0) /
                            max(1., self.const_denominator[l])))
                    for l in range(self.maxlen + 1)
                ])
                self.const_denominator = [
                    np.log(
                        np.true_divide(
                            max(1.,
                                self.pattern_space[l] + self.beta_l[l] - 1),
                            max(1., self.Asize[-1][l] + self.alpha_l[l] - 1)))
                    for l in range(self.maxlen + 1)
                ]
                self.maps[0].append([
                    ith_iter, prob, rules_new,
                    [self.rules[i] for i in rules_new], cfmatrix
                ])
                new_supp = np.ceil(
                    np.log(
                        max([
                            np.true_divide(
                                self.pattern_space[l] - self.Asize[-1][l] +
                                self.beta_l[l],
                                max(1.,
                                    self.Asize[-1][l] - 1 + self.alpha_l[l]))
                            for l in range(1, self.maxlen + 1, 1)
                        ])))
                self.C.append(new_supp)
                self.predicted_rules = rules_new
            if np_random() <= alpha:
                rules_curr, pt_curr = rules_new[:], pt_new

        return self.maps[0]
예제 #5
0
    def propose(self, rules_curr, y, r_matrix):
        """ Propose a modification to the current set of rules

        :param rules_curr: np.array
            indexes of rules currently in play
        :param y: np.array
        :param r_matrix: np.ndarray
            satisfaction matrix for all the rules in play
        :return: np.array
            proposed set of rules
        """

        # ex is an instance selected at random
        ex = None

        yhat = self.check_satisfies_at_least_one_rule(r_matrix, rules_curr)
        incorr = np.where(y != yhat)[0]
        rules_curr_len = len(rules_curr)

        move = ['clean']
        if len(incorr) > 0:
            ex = sample(list(incorr), 1)[0]
            t = np_random()
            if y[ex] == 1 or rules_curr_len == 1:
                if t < 1.0 / 2 or rules_curr_len == 1:
                    move = ['add']
                else:
                    move = ['cut', 'add']
            else:
                if t < 1.0 / 2:
                    move = ['cut']
                else:
                    move = ['cut', 'add']
        # logger.debug("move: %s" % str(move))

        # 'cut' a rule
        if move[0] == 'cut':
            try:
                if np_random() < self.propose_threshold:
                    candidate = []
                    for rule in rules_curr:
                        if r_matrix[ex, rule]:
                            candidate.append(rule)
                    if len(candidate) == 0:
                        candidate = rules_curr
                    cut_rule = sample(candidate, 1)[0]
                else:
                    p = []
                    all_sum = np.zeros(r_matrix.shape[0], dtype=int)
                    for rule in rules_curr:
                        all_sum = all_sum + r_matrix[:, rule]

                    for ith_rule, rule in enumerate(rules_curr):
                        yhat = (all_sum - r_matrix[:, rule]) > 0
                        TP, FP, TN, FN = get_confusion(yhat, y)
                        p.append(TP.astype(float) / (TP + FP + 1))
                    p = [x - min(p) for x in p]
                    p = np.exp(p)
                    p = np.insert(p, 0, 0)
                    p = np.array(list(accumulate(p)))
                    if p[-1] == 0:
                        index = sample(list(range(len(rules_curr))), 1)[0]
                    else:
                        p = p / p[-1]
                        index = find_lt(p, np_random())
                    cut_rule = rules_curr[index]
                rules_curr.remove(cut_rule)
                move.remove('cut')
            except:
                move.remove('cut')

        # 'add' a rule
        if len(move) > 0 and move[0] == 'add':
            if y[ex] == 1:
                select = np.where(
                    (self.supp > self.C[-1]) & ~r_matrix[ex] > 0)[0]
            else:
                select = np.where(
                    (self.supp > self.C[-1]) & r_matrix[ex] > 0)[0]
            if len(select) > 0:
                if np_random() < self.propose_threshold:
                    add_rule = sample(select.tolist(), 1)[0]
                else:
                    Yhat_neg_index = np.where(
                        ~self.check_satisfies_at_least_one_rule(
                            r_matrix, rules_curr))[0]
                    # In case Yhat_neg_index is []
                    if Yhat_neg_index.shape[0] == 0:
                        return rules_curr
                    mat = r_matrix[
                        Yhat_neg_index.reshape(-1, 1),
                        select].transpose() & y[Yhat_neg_index].astype(int)
                    TP = np.sum(mat, axis=1)
                    FP = np.array(
                        np.sum(r_matrix[Yhat_neg_index.reshape(-1, 1), select],
                               axis=0) - TP)
                    p = (TP.astype(float) / (TP + FP + 1))
                    add_rule = select[sample(list(np.where(p == max(p))[0]),
                                             1)[0]]
                try:
                    if add_rule not in rules_curr:
                        rules_curr.append(add_rule)
                except:
                    pass

        return rules_curr
예제 #6
0
 def decide_to_connect(self):
     return np_random() < self.prob