コード例 #1
0
    def __error(self, R, P, Q, K, beta):
        """
        Calculates the error for the function
        :param R:
        :param P:
        :param Q:
        :param K:
        :param beta:
        :return:
        """
        e = 0
        for i in xrange(len(R)):
            for j in xrange(len(R[i])):
                if R[i][j] > 0:

                    # loss function error sum( (y-y_hat)^2 )
                    e = e + pow(R[i][j] - numpy.dot(P[i, :], Q[:, j]), 2)

                    # add regularization
                    for k in xrange(K):

                        # error + ||P||^2 + ||Q||^2
                        e = e + (beta / 2) * (pow(P[i][k], 2) +
                                              pow(Q[k][j], 2))
        return e
コード例 #2
0
def generate_table_from_xlsx(path):
    book = xlrd.open_workbook(path)
    sheet = book.sheet_by_index(1)
    data = {}
    for row_index in xrange(0, sheet.nrows):
        row_values = [sheet.cell(row_index, col_index).value for col_index in xrange(0, sheet.ncols)]
        if row_index == 2:
            time_w = list(filter(None, row_values))
            print(time_w)

        if row_index == 3:
            idx_w = [index for index, value in enumerate(row_values) if value == "WEIGHT"]
            idx_c = [index for index, value in enumerate(row_values) if value == "FAMACHA"]

        chunks = []
        if row_index > 4:
            for i in xrange(0, len(idx_w)):
                if row_values[1] is '':
                    continue
                s = "40101310%s" % row_values[1]
                serial = int(s.split('.')[0])
                chunks.append([time_w[i], row_values[idx_c[i]], serial])
            if len(chunks) != 0:
                data[serial] = chunks
    print(data)
    return data
コード例 #3
0
ファイル: bleu_scorer.py プロジェクト: Jackcong1/ECMRC
def precook(s, n=4, out=False):
    """Takes a string as input and returns an object that can be given to
    either cook_refs or cook_test. This is optional: cook_refs and cook_test
    can take string arguments as well."""
    words = s.split()

    counts = defaultdict(int)
    for k in xrange(1, n + 1):
        for i in xrange(len(words) - k + 1):
            ngram = tuple(words[i:i + k])
            counts[ngram] += 1
    return (len(words), counts)
コード例 #4
0
 def averagePixels(self):
     r, g, b = 0, 0, 0
     count = 0
     for x in xrange(self.pic.size[0]):
         for y in xrange(self.pic.size[1]):
             tempr, tempg, tempb = self.imgData[x, y]
             r += tempr
             g += tempg
             b += tempb
             count += 1
     # calculate averages
     return (r / count), (g / count), (b / count)
コード例 #5
0
ファイル: grad_cam.py プロジェクト: dan393/Dissertation
def shrink_shap(data, rows, cols):
    shrunk = np.zeros((rows,cols))
    for i in xrange(0,rows):
        for j in xrange(0,cols):
            row_sp = int (data.shape[0]/rows)
            col_sp = int (data.shape[1]/cols)
            zz = data[i*row_sp : i*row_sp + row_sp, j*col_sp : j*col_sp + col_sp]
            shrunk[i,j] = np.sum(zz)
    values =  np.array([(item) for sublist in shrunk/np.max(shrunk) for item in sublist])
    shap_values = []
    for i in range(5):
        shap_values.append([[item for sublist in values for item in sublist]])
    shap_values = np.array(shap_values)
    shap_values = shap_values / np.max(shap_values)
コード例 #6
0
def analyze_centrality(graph):
    centrality_dict = OrderedDict()

    print('Analyzing degree centrality...')
    score_list = graph.degree()
    centrality_dict['degree'] = sorted([(graph.vs[i], score_list[i])
                                        for i in xrange(0, len(score_list))],
                                       key=lambda x: x[1],
                                       reverse=True)
    print('Done!')
    print

    print('Analyzing betweenness centrality...')
    score_list = graph.betweenness()
    centrality_dict['betweenness'] = sorted(
        [(graph.vs[i], score_list[i]) for i in xrange(0, len(score_list))],
        key=lambda x: x[1],
        reverse=True)
    print('Done!')
    print

    print('Analyzing  closeness centrality...')
    score_list = graph.closeness()
    centrality_dict['closeness'] = sorted(
        [(graph.vs[i], score_list[i]) for i in xrange(0, len(score_list))],
        key=lambda x: x[1],
        reverse=True)
    print('Done!')
    print

    print('Analyzing eigenvector centrality...')
    score_list = graph.evcent()
    centrality_dict['eigenvector'] = sorted(
        [(graph.vs[i], score_list[i]) for i in xrange(0, len(score_list))],
        key=lambda x: x[1],
        reverse=True)
    print('Done!')
    print

    print('Analyzing pagerank centrality...')
    score_list = graph.pagerank()
    centrality_dict['pagerank'] = sorted([(graph.vs[i], score_list[i])
                                          for i in xrange(0, len(score_list))],
                                         key=lambda x: x[1],
                                         reverse=True)
    print("Done!")

    print

    return centrality_dict
コード例 #7
0
ファイル: bleu_scorer.py プロジェクト: Jackcong1/ECMRC
def cook_test(test, reflen_refmaxcounts, eff=None, n=4):
    '''Takes a test sentence and returns an object that
    encapsulates everything that BLEU needs to know about it.'''

    testlen, counts = precook(test, n, True)

    result = {}

    # Calculate effective reference sentence length.

    if eff == "closest":
        result["reflen"] = min(
            (abs(l - testlen), l) for l in reflen_refmaxcounts[0])[1]
    else:  ## i.e., "average" or "shortest" or None
        result["reflen"] = reflen_refmaxcounts[0]

    result["testlen"] = testlen

    result["guess"] = [max(0, testlen - k + 1) for k in xrange(1, n + 1)]

    result['correct'] = [0] * n
    for (ngram, count) in counts.items():
        result["correct"][len(ngram) - 1] += min(
            reflen_refmaxcounts[1].get(ngram, 0), count)

    return result
コード例 #8
0
def generate_data_table_from_xlsx(path):
    book = xlrd.open_workbook(path)
    sheet = book.sheet_by_index(1)
    data = []
    print("reading file...")
    for row_index in xrange(0, sheet.nrows):
        if row_index == 0:
            continue
        row = [sheet.cell(row_index, col_index).value for col_index in xrange(0, sheet.ncols)]
        postal_code = format_postcode(row[0].strip())
        geo_data = get_geoloc_data(postal_code)
        row.extend(geo_data)
        data.append(tuple(row))

    print("finished reading. start appending SQL database...")
    insert_record_to_sql_table("final_data", data)
    sql_db_flush()
コード例 #9
0
 def computeKernelMatrix(self, data1, data2, symmetric=False):
     """
     Computes the kernel matrix
     """
     logging.debug("Starting RBF Kernel Matrix Computation...")
     self._data1 = mat(data1)
     self._data2 = mat(data2)
     assert self._data1.shape[1] == (self._data2.T).shape[0]
     self._dim1 = len(data1)
     self._dim2 = len(data2)
     self._symmetric = symmetric
     self.__km = None
     try:
         if self._symmetric:
             linearkm = self._data1 * self._data2.T
             trnorms = mat(np.diag(linearkm)).T
             trace_matrix = trnorms * mat(
                 np.ones((1, self._dim1), dtype=float64))
             self.__km = trace_matrix + trace_matrix.T
             self.__km = self.__km - 2 * linearkm
             self.__km = -self.__sigma_squared_inv * self.__km
             self.__km = np.exp(self.__km)
             return self.__km
         else:
             m = self._data1.shape[0]
             n = self._data2.shape[0]
             assert self._data1.shape[1] == self._data2.shape[1]
             linkm = mat(self._data1 * self._data2.T)
             trnorms1 = []
             for i in xrange(m):
                 trnorms1.append((self._data1[i] * self._data1[i].T)[0, 0])
             trnorms1 = mat(trnorms1).T
             trnorms2 = []
             for i in xrange(n):
                 trnorms2.append((self._data2[i] * self._data2[i].T)[0, 0])
             trnorms2 = mat(trnorms2).T
             self.__km = trnorms1 * mat(np.ones((n, 1), dtype=float64)).T
             self.__km = self.__km + mat(np.ones(
                 (m, 1), dtype=float64)) * trnorms2.T
             self.__km = self.__km - 2 * linkm
             self.__km = -self.__sigma_squared_inv * self.__km
             self.__km = np.exp(self.__km)
             return self.__km
     except Exception as e:
         logging.error("Error while computing kernel matrix: " + str(e))
         sys.exit()
コード例 #10
0
def discount_rewards(r):
  """ take 1D float array of rewards and compute discounted reward """
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(xrange(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r
コード例 #11
0
def EM(So: float, mu: float, sigma: float, N: int, M=1):
    b = Brownian(N)[1]
    dt = M * (1 / N)  # EM step size
    L = N / M
    wi = [So]
    for i in xrange(0, int(L)):
        Winc = np.sum(b[(M * (i - 1) + M):(M * i + M)])
        w_i_new = wi[i] + mu * wi[i] * dt + sigma * wi[i] * Winc
        wi.append(w_i_new)
    return wi, dt
コード例 #12
0
def GBM(So: float, mu: float, sigma: float, N: float, T=1.) -> list:
    W = Brownian(N)[0]
    t = np.linspace(0., 1., int(N) + 1)
    S = []
    S.append(So)
    for i in xrange(1, int(N + 1)):
        drift = (mu - 0.5 * sigma**2) * t[i]
        diffusion = sigma * W[i - 1]
        S_temp = So * np.exp(drift + diffusion)
        S.append(S_temp)
    return S, t
コード例 #13
0
ファイル: K-Means_new.py プロジェクト: Sherry280/pythonCode
def show(dataSet, k, centroids, clusterAssment):
    #查看矩阵或者数组的维数 ,行数 列数
    numSamples, dim = dataSet.shape
    mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
    for i in xrange(numSamples):
        markIndex = int(clusterAssment[i, 0])
        plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex])
    mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
    for i in range(k):
        plt.plot(centroids[i, 0], centroids[i, 1], mark[i], markersize=12)
    plt.show()
コード例 #14
0
def check_possibility(nums):
    """
    :type nums: List[int]
    :rtype: bool
    """
    p = None
    for i in xrange(len(nums) - 1):
        if nums[i] > nums[i + 1]:
            if p:
                return False
            p = i
            return (not p) or p == 0 or p == len(nums) - 2 or nums[
                p - 1] <= nums[p + 1] or nums[p] < nums[p + 2]
コード例 #15
0
    def computeKernelMatrix(self, data1, data2, symmetric=False):
        """
        Computes the kernel matrix
        """
        logging.debug("Starting Linear Kernel Matrix Computation...")
        self._data1 = data1
        self._data2 = data2
        self._dim1 = len(data1)
        self._dim2 = len(data2)
        self._symmetric = symmetric
        self.__km = None
        try:
            km = mat(zeros((self._dim1, self._dim2), dtype=float64))
            if self._symmetric:
                for i in xrange(self._dim1):
                    message = 'Kernel Matrix Progress: %dx%d/%dx%d' % (
                        i, self._dim2, self._dim1, self._dim2)
                    logging.debug(message)
                    for j in xrange(i, self._dim2):
                        val = self.getKernelValue(self._data1[i],
                                                  self._data2[j])
                        km[i, j] = val
                        km[j, i] = val
                return km
            else:
                for i in xrange(self._dim1):
                    message = 'Kernel Matrix Progress: %dx%d/%dx%d' % (
                        i, self._dim2, self._dim1, self._dim2)
                    logging.debug(message)
                    for j in xrange(0, self._dim2):
                        val = self.getKernelValue(self._data1[i],
                                                  self._data2[j])
                        km[i, j] = val
                return km

        except Exception as e:
            logging.error("Error while computing kernel matrix: " + str(e))
            sys.exit()
        logging.debug("Kernel Matrix computed...")
コード例 #16
0
def main(args):
    parser = argparse.ArgumentParser(
        description="""Generate one or more JSON objects containing random data 
given a input json-schema.org compatible schema specification""")
    parser.add_argument('schemaFile',
                        metavar='file',
                        help='json-schema.org schema file to use')
    parser.add_argument('--count',
                        default=1,
                        type=int,
                        help='number of objects to create (default: 1)')
    parser.add_argument('--mode',
                        choices=['pure', 'mongo', 'full'],
                        default="mongo",
                        help="""\
format of non-string data to emit.  pure is normal JSON; dates emitted
as ISO8601 strings and numbers are just numbers.   mongo is mongoDB-compatible
where dates are emitted as a special map {"$date", millis}.  mongoimport is
sensitive to maps with $ keys and will process the content as the indicated type.
full is a superset of mongoDB types.  Integers are emitted as {"$int", value},
floats as {"$float", value}.  mongoimport does not permit this -- but 
pymonimport does.""")
    parser.add_argument('--defaultStringIpsum',
                        choices=['word', 'sentence', 'paragraph', 'fname'],
                        default="word",
                        help="""\
default style of string to emit when presented with type:string.""")

    rargs = parser.parse_args()

    fname = rargs.schemaFile
    count = rargs.count
    fp = open(fname)

    try:
        schema = json.load(fp)

        params = {
            "mode": rargs.mode,
            "defaultStringIpsum": rargs.defaultStringIpsum
        }

        q = Ipsum.Ipsum(params)

        for i in xrange(count):
            z = q.createItem(schema)
            print(json.dumps(z))
    except ValueError:
        tb = traceback.format_exc()
        print(tb)
コード例 #17
0
    def makeIpsum(self, ipsum):
        style = self.dsi  # default
        s = None

        if ipsum != None:
            style = ipsum

        if style == "sentence":
            n = self.randomInt(10, 20)
            s = ' '.join([self.randomFrom(self.bleck) for num in xrange(n)])

        elif style == "paragraph":
            n = self.randomInt(10, len(self.bleck))
            s = ' '.join([self.randomFrom(self.bleck) for num in xrange(n)])

        elif style == "word":
            s = self.randomFrom(self.bleck)

        elif style == "fname":
            s = self.randomFrom(self.fnames)

        elif style == "id":
            s = str(uuid.uuid4())

        elif style == "bson:ObjectId" or style == "bson:7":
            v = self.generateMongoOID()

            if self.mode == self.PURE_JSON:
                s = v
            else:
                # oooo   not a string, but a dict!
                s = {"$oid": v}

        else:
            s = "unknown_ipsum \"" + style + "\""

        return s
コード例 #18
0
def centroid_centrality(graph: Graph):
    pathm = np.array([])
    adjm = graph.get_adjacency().data
    for v1 in xrange(0, len(graph.vs)):
        temp = []
        for v2 in xrange(0, len(graph.vs)):
            if adjm[i][j] > 0:
                temp.append(graph.shortest_paths(source=v1, target=v2)[0][0])
            else:
                temp.append(0)
        pathm = np.append(pathm, temp)

    fm = np.zeros((len(graph.vs), len(graph.vs)))
    for v1 in xrange(0, len(graph.vs)):
        for v2 in xrange(0, len(graph.vs)):
            fm[v1][v2] = len(
                filter(lambda x: x[0] < x[1], zip(pathm[v1], pathm[v2]))
            ) - len(filter(lambda x: x[0] > x[1], zip(pathm[v1], pathm[v2])))

    score_dict = {}
    for v in graph.vs:
        score_dict[v] = min(fm[v.index])

    return sorted(score_dict.items(), key=lambda x: x[1], reverse=True)
コード例 #19
0
 def __optimize(self):
     logging.debug("Starting optimization with BFGS ...")
     self.__needed_function_calls = 0
     # starting_point
     c_current = zeros(self.__dim, float64)
     c_current[self.__dim - 1] = self.__b
     # Annealing sequence.
     for i in xrange(len(self.__lam_Uvec)):
         self.__lamU = self.__lam_Uvec[i]
         # crop one dimension (in case the offset b is fixed)
         c_current = c_current[:self.__dim - 1]
         c_current = self.__localSearch(c_current)
         # reappend it if needed
         c_current = np.append(c_current, self.__b)
     f_opt = self.__getFitness(c_current)
     return c_current, f_opt
コード例 #20
0
ファイル: lcs.py プロジェクト: workprinond/Anti-Alignment
    def longestcommonsubsequence(A, B):

        x = len(A)
        y = len(B)

        P = [[None] * (y + 1) for i in xrange(x + 1)]

        for i in range(x + 1):
            for j in range(y + 1):
                if i == 0 or j == 0:
                    P[i][j] = 0
                elif A[i - 1] == B[j - 1]:
                    P[i][j] = P[i - 1][j - 1] + 1
                else:
                    P[i][j] = max(P[i - 1][j], P[i][j - 1])

        return P[x][y]
コード例 #21
0
def GBM(So: float, mu: float, sigma: float, N: float) -> list:
    """[summary]
    Arguments:
        So {float} -- initial stock price
        mu {float} -- mean of historical daily returns
        sigma {float} -- standard deviation of historical daily returns
        N {float} -- number of time points in prediction the time horizon
    
    Keyword Arguments:
        T {[type]} -- length of the prediction time horizon (default: {1.})
    
    Returns:
        list -- [description]
    """
    W = Brownian(N)[0]
    t = np.linspace(0., 1., int(N) + 1)
    S = []
    S.append(So)
    for i in xrange(1, int(N + 1)):
        drift = (mu - 0.5 * sigma**2) * t[i]
        diffusion = sigma * W[i - 1]
        S_temp = So * np.exp(drift + diffusion)
        S.append(S_temp)
    return S, t
コード例 #22
0
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in xrange(0, len(lst), n):
        yield lst[i:i + n]
コード例 #23
0
    def makeThing(self, path, info):
        type = info["type"]

        if type == "null":
            o = "null"

        elif type == "string":
            fmt = None
            v = None

            if "format" in info:
                fmt = info['format']

            # date-time is special.  It is good to have optimizations
            # because running the parser over and over again on the
            # string rep of a date is very very slow...
            if "enum" in info:
                if fmt == "date-time":
                    if '_dateEnums' not in info:
                        info['_dateEnums'] = [
                            self.str2Epoch(ss) for ss in info['enum']
                        ]

                    v = self.randomFrom(info['_dateEnums'])  # pick
                else:
                    v = self.randomFrom(info['enum'])  # v is no longer None

            if fmt == "date-time":
                if v is not None:  # must have been an enum; parse!
                    epoch = v  #self.str2Epoch(v)
                else:
                    if 'ipsum' in info:  # if we have ipsum...
                        q = info['ipsum']
                        if 'inc' in q:  # ...AND we have inc then OK!
                            q2 = q['inc']  #i.e. { "start": 0, "val": 1 }
                            if path not in self.counters:
                                epoch = self.str2Epoch(
                                    q2['start'])  # expensive
                                self.counters[path] = epoch
                            else:
                                if 'secs' in q2:
                                    v2 = q2['secs']
                                if 'mins' in q2:
                                    v2 = q2['mins'] * 60
                                if 'hrs' in q2:
                                    v2 = q2['hrs'] * 60 * 60
                                if 'days' in q2:
                                    v2 = q2['days'] * 60 * 60 * 24

                                v2 *= self.millisAdj

                                self.counters[path] += v2

                            epoch = self.counters[path]

                    # was no ipsum or no ipsum.inc...
                    else:
                        # try for min and max.  To avoid running the expensive
                        # parse over and over, look for _min and _max (which only
                        # we can create).  Don't believe it?  Try commenting out
                        # the assignments below (lines ending with #tag1) and
                        # rerun.  It's almost 3x (300%) faster when you parse and
                        # save the value....
                        mmin = self.lowDateEpoch
                        mmax = self.highDateEpoch

                        if '_min' in info:
                            mmin = info['_min']
                        else:
                            if 'minimum' in info:
                                mmin = self.str2Epoch(
                                    info['minimum'])  # expensive
                                info['_min'] = mmin  #tag1

                        if '_max' in info:
                            mmax = info['_max']
                        else:
                            if 'maximum' in info:
                                mmax = self.str2Epoch(
                                    info['maximum'])  # expensive
                                info['_max'] = mmax  #tag1

                        epoch = self.randomLong(mmin, mmax)

                if self.mode == self.FULL_EXT_JSON or self.mode == self.MONGO_JSON:
                    o = {"$date": epoch}
                elif self.mode == self.RAW:
                    o = datetime.datetime.fromtimestamp(epoch)
                else:
                    o = datetime.datetime.fromtimestamp(epoch).strftime(
                        '%Y-%m-%dT%H:%M:%S')

            elif v is None:  # not date-time and not enum
                if fmt is not None:
                    # format takes precedence over ipsum field:
                    o = self.makeFormattedString(fmt)
                else:
                    t = info['ipsum'] if 'ipsum' in info else None
                    o = self.makeIpsum(t)

            else:
                o = v

        elif type == "object":
            ss = info["properties"]
            nn = {}
            self.processObject(nn, path, ss)
            o = nn

        elif type == "array":
            ss = info["items"]
            mmin = ss['minItems'] if 'minItems' in ss else self.DEF_MIN_ARR_LEN
            mmax = ss['maxItems'] if 'maxItems' in ss else self.DEF_MAX_ARR_LEN

            # List comprehensions front and center....
            o = [
                self.makeThing(path + "." + str(i), ss)
                for i in xrange(self.randomInt(mmin, mmax))
            ]

        elif type == "oneOf":
            ll = info["items"]  # A list, not a dict!
            x = self.randomFrom(ll)  # pick one and go!
            o = self.makeThing(path, x)

        elif type == "number" or type == "integer":
            v = None

            if "enum" in info:
                v = self.randomFrom(info['enum'])  # v is no longer None

            elif "ipsum" in info:
                q = info['ipsum']
                if 'inc' in q:
                    q2 = q['inc']  #i.e. { "start": 0, "val": 1 }
                    if path not in self.counters:
                        self.counters[path] = q2['start']
                    else:
                        self.counters[path] += q2['val']
                    v = self.counters[path]

            if v == None:
                mmin = info['minimum'] if 'minimum' in info else -100
                mmax = info['maximum'] if 'maximum' in info else 100
                if type == "number":
                    v = self.randomDouble(mmin, mmax)

                if type == "integer":
                    v = self.randomInt(mmin, mmax)

            # At this point, we have SOME kind of v!
            if self.mode == self.FULL_EXT_JSON:
                if type == "number":
                    o = {"$float": v}

                if type == "integer":
                    o = {"$int": v}

            else:
                o = v

        elif type == "boolean":
            v = None

            if "enum" in info:
                q = str(self.randomFrom(info['enum']))  # Force to str....
                v = q.lower() in ("yes", "true", "t", "1")
                # v is no longer None but a bool

            if v == None:
                v = True if self.randomDouble(0, 1) > .5 else False

            o = v

        return o
コード例 #24
0
    def __factor_matrix(self, R, K, alpha, steps, beta, error_limit):
        """
        R = user x product matrix
        K = latent features count (how many features we think the model should derive)
        alpha = learning rate
        beta = regularization penalty (minimize over/under fitting)
        step = logistic regression steps
        error_limit = algo finishes when error reaches this level

        Returns:
        P = User x features matrix. (How strongly a user is associated with a feature)
        Q = Product x feature matrix. (How strongly a product is associated with a feature)
        To predict, use dot product of P, Q
        """
        # Transform regular array to numpy array
        R = numpy.array(R)

        # Generate P - N x K
        # Use random values to start. Best performance
        N = len(R)
        M = len(R[0])
        P = numpy.random.rand(N, K)

        # Generate Q - M x K
        # Use random values to start. Best performance
        Q = numpy.random.rand(M, K)
        Q = Q.T

        error = 0

        # iterate through max # of steps
        for step in xrange(steps):

            # iterate each cell in r
            for i in xrange(len(R)):
                for j in xrange(len(R[i])):
                    if R[i][j] > 0:

                        # get the eij (error) side of the equation
                        eij = R[i][j] - numpy.dot(P[i, :], Q[:, j])

                        for k in xrange(K):
                            # (*update_rule) update pik_hat
                            P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] -
                                                         beta * P[i][k])

                            # (*update_rule) update qkj_hat
                            Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] -
                                                         beta * Q[k][j])

            # Measure error
            error = self.__error(R, P, Q, K, beta)

            # Terminate when we converge
            if error < error_limit:
                break

        # track Q, P (learned params)
        # Q = Products x feature strength
        # P = Users x feature strength
        self.Q = Q.T
        self.P = P

        self.__print_fit_stats(error, N, M)
コード例 #25
0
with tf.name_scope("train_op_"):
  train_op = tf.train.AdamOptimizer(learning_rate=2.**-5).minimize(loss)
with tf.name_scope("mse_"):
  mse = tf.reduce_mean(tf.squared_difference(y, bernoulli.mean()))
with tf.name_scope("init_op_"):
  init_op = tf.global_variables_initializer()

# Run graph 1000 times.
with tf.name_scope("train_100_"):
  num_steps = 2000
  loss_ = np.zeros(num_steps)   # Style: `_` to indicate sess.run result.
  mse_ = np.zeros(num_steps)

with tf.Session() as sess:
  sess.run(init_op)
  for it in xrange(loss_.size):
    _, loss_[it], mse_[it] = sess.run([train_op, loss, mse])
    if it % 200 == 0 or it == loss_.size - 1:
      print("iteration:{}  loss:{}  mse:{}".format(it, loss_[it], mse_[it]))

# sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR)
writer.add_graph(sess.graph)
saver = tf.train.Saver()




# ==> iteration:0    loss:0.635675370693  mse:0.222526371479
#     iteration:200  loss:0.440077394247  mse:0.143687799573
#     iteration:400  loss:0.440077394247  mse:0.143687844276
コード例 #26
0
def compute_svd_pca():
    global W, H
    face_width, face_height = (W, H)

    # Create a vector for all faces
    face_vector = np.array([
        cv2.imread(os.path.join(face_database_gray_file, filename),
                   0).flatten() for filename in f_list
    ])

    # Compute average face
    fave = np.mean(face_vector, 0)

    # Subtract the average face from each image before performing SVD and PCA
    X = face_vector - fave

    print("Finding SVD of data matrix")
    # Decompose the mean-centered matrix into three parts

    U, S, Vt = np.linalg.svd(X.transpose(), full_matrices=False)
    V = Vt.T

    # Sort principal components by descending order of the singular values
    ind = np.argsort(S)[::-1]
    U, S, V = U[:, ind], S[ind], V[:, ind]
    eigenfaces = U

    # Print Dimensions
    print("face_vector:", face_vector.shape)
    print("U:", U.shape)
    print("Sigma:", S.shape)
    print("V^T:", Vt.shape)

    # Weights is an n x n matrix
    weights = np.dot(X, eigenfaces)  # TODO: Maybe swap + .T to eigenfaces

    # Some intermediate save:
    save_average_face = True
    if save_average_face:
        # Save average face
        average_face = fave.reshape(face_width, face_height)
        cv2.imwrite(os.path.join(intermediate_file, 'average_face.jpg'),
                    average_face)

    save_eigenvectors = False
    if save_eigenvectors:
        print("Saving eigenvectors...")
        for i in xrange(n):
            f_name = os.path.join(intermediate_file, 'eigenvector_%s.png' % i)
            im = U[:, i].reshape(face_width, face_height)
            cv2.imwrite(f_name, im)

    save_reconstructed = True
    if save_reconstructed:
        k = 30
        print(
            '\n',
            'Save the reconstructed images based on only "%s" eigenfaces' % k)
        for img_id in range(n):
            # for k ranging from 1 to total + 1:
            reconstructed_face = fave + np.dot(weights[img_id, :k],
                                               eigenfaces[:, :k].T)
            reconstructed_face.shape = (
                face_width, face_height
            )  # transform vector to initial image size
            cv2.imwrite(
                os.path.join(intermediate_file,
                             'img_reconstr_%s_k=%s.png' % (f_list[img_id], k)),
                reconstructed_face)

    # Projected training images into PCA subspace as yn=weights or Yn = E.T * (Xn - average_face)
    training_proj = weights
    average_face_flatten = fave

    return training_proj, eigenfaces, average_face_flatten
コード例 #27
0
 def genQRToken(self, qrsig):
     e = 0
     for i in xrange(0, len(qrsig)):
         e += (e << 5) + ord(qrsig[i])
     qrtoken = (e & 2147483647)
     return str(qrtoken)
コード例 #28
0
ファイル: bleu_scorer.py プロジェクト: Jackcong1/ECMRC
    def compute_score(self, option=None, verbose=0):
        n = self.n
        small = 1e-9
        tiny = 1e-15  ## so that if guess is 0 still return 0
        bleu_list = [[] for _ in range(n)]

        if self._score is not None:
            return self._score

        if option is None:
            option = "average" if len(self.crefs) == 1 else "closest"

        self._testlen = 0
        self._reflen = 0
        totalcomps = {
            'testlen': 0,
            'reflen': 0,
            'guess': [0] * n,
            'correct': [0] * n
        }

        # for each sentence
        for comps in self.ctest:
            testlen = comps['testlen']
            self._testlen += testlen

            if self.special_reflen is None:  ## need computation
                reflen = self._single_reflen(comps['reflen'], option, testlen)
            else:
                reflen = self.special_reflen

            self._reflen += reflen

            for key in ['guess', 'correct']:
                for k in xrange(n):
                    totalcomps[key][k] += comps[key][k]

            # append per image bleu score
            bleu = 1.
            for k in xrange(n):
                bleu *= (float(comps['correct'][k]) + tiny) \
                        /(float(comps['guess'][k]) + small)
                bleu_list[k].append(bleu**(1. / (k + 1)))
            ratio = (testlen + tiny) / (reflen + small
                                        )  ## N.B.: avoid zero division
            if ratio < 1:
                for k in xrange(n):
                    bleu_list[k][-1] *= math.exp(1 - 1 / ratio)

            if verbose > 1:
                print(comps, reflen)

        totalcomps['reflen'] = self._reflen
        totalcomps['testlen'] = self._testlen

        bleus = []
        bleu = 1.
        for k in xrange(n):
            bleu *= float(totalcomps['correct'][k] + tiny) \
                    / (totalcomps['guess'][k] + small)
            bleus.append(bleu**(1. / (k + 1)))
        ratio = (self._testlen + tiny) / (self._reflen + small
                                          )  ## N.B.: avoid zero division
        if ratio < 1:
            for k in xrange(n):
                bleus[k] *= math.exp(1 - 1 / ratio)

        if verbose > 0:
            print(totalcomps)
            print("ratio:", ratio)

        self._score = bleus
        return self._score, bleu_list
コード例 #29
0
 def genBKN(self, skey):
     b = 5381
     for i in xrange(0, len(skey)):
         b += (b << 5) + ord(skey[i])
     bkn = (b & 2147483647)
     return str(bkn)
コード例 #30
0
def generate_student_migation_data_table_from_xlsx(path):
    print('generate_student_migation_data_table_from_xlsx')
    book = xlrd.open_workbook(path)
    sheet = book.sheet_by_index(0)
    data = []
    region_of_he_provider_list = []
    domicile_list = []
    geolocator = Nominatim(user_agent=__name__)

    print("reading file...")
    for row_index in xrange(0, sheet.nrows):
        if row_index < 18:
            continue
        row = [sheet.cell(row_index, col_index).value for col_index in xrange(0, sheet.ncols)]
        a_way_domicile = row[0]
        domicile = row[1]
        domicile_list.append(clean(domicile))
        level_of_study = row[2]
        mode_of_study = row[3]
        academic_year = int(row[4].split('/')[0])
        region_of_he_provider = row[5]
        if 'Total England' in region_of_he_provider:
            region_of_he_provider = 'England'

        if 'Total United Kingdom' in region_of_he_provider:
            region_of_he_provider = 'United Kingdom'
        region_of_he_provider_list.append(region_of_he_provider)
        number = int(row[6])
        data.append([a_way_domicile, domicile, level_of_study, mode_of_study, academic_year, region_of_he_provider, number])

    region_of_he_provider_list = list(set(region_of_he_provider_list))
    region_of_he_provider_list.sort()
    domicile_list = list(set(domicile_list))
    domicile_list.sort()

    with open("geolocator.data") as f:
        content = f.readlines()
    latitudes_longitudes_dom = [json.loads(x.strip()) for x in content][0]

    for place in domicile_list:
        print(place)
        if place not in latitudes_longitudes_dom:
            location = geolocator.geocode(place, timeout=None)
            latitudes_longitudes_dom[place] = {'lat': location.latitude, 'long': location.longitude}
            print('cache...')
            with open('geolocator.data', 'w') as outfile:
                outfile.write(json.dumps(latitudes_longitudes_dom))
            outfile.close()
            time.sleep(1.1)
        else:
            print(place, latitudes_longitudes_dom[place])
            # get_geoloc_data_migration(place)

    latitudes_longitudes_he = {}
    for place in region_of_he_provider_list:
        if 'Total England' in place:
            place = 'England'

        if 'Total United Kingdom' in place:
            place = 'United Kingdom'

        # get_geoloc_data_migration(place)
        location = geolocator.geocode(place + ' , UK', timeout=None)
        print(place, '-', location)
        latitudes_longitudes_he[place] = {'lat': location.latitude, 'long': location.longitude}
        time.sleep(1.1)

    final_data = []
    for item in data:
        place = item[5]
        if 'Total England' in place:
            place = 'England'
        if 'Total United Kingdom' in place:
            place = 'United Kingdom'
        dom_c = latitudes_longitudes_dom[clean(item[1])]
        item.append(dom_c['lat'])
        item.append(dom_c['long'])
        he_c = latitudes_longitudes_he[clean(place)]
        item.append(he_c['lat'])
        item.append(he_c['long'])
        item[1] = clean(item[1])
        if 'Caribbean' in item[1]:
            print(item)
        final_data.append(tuple(item))


    print("finished reading. start appending SQL database...")
    insert_record_to_sql_table_student_migration("student_migration", final_data)
    sql_db_flush()